Index: head/sys/coda/coda_vfsops.c
===================================================================
--- head/sys/coda/coda_vfsops.c	(revision 49534)
+++ head/sys/coda/coda_vfsops.c	(revision 49535)
@@ -1,589 +1,587 @@
 /*
  * 
  *             Coda: an Experimental Distributed File System
  *                              Release 3.1
  * 
  *           Copyright (c) 1987-1998 Carnegie Mellon University
  *                          All Rights Reserved
  * 
  * Permission  to  use, copy, modify and distribute this software and its
  * documentation is hereby granted,  provided  that  both  the  copyright
  * notice  and  this  permission  notice  appear  in  all  copies  of the
  * software, derivative works or  modified  versions,  and  any  portions
  * thereof, and that both notices appear in supporting documentation, and
  * that credit is given to Carnegie Mellon University  in  all  documents
  * and publicity pertaining to direct or indirect use of this code or its
  * derivatives.
  * 
  * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
  * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
  * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
  * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
  * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
  * ANY DERIVATIVE WORK.
  * 
  * Carnegie  Mellon  encourages  users  of  this  software  to return any
  * improvements or extensions that  they  make,  and  to  grant  Carnegie
  * Mellon the rights to redistribute these changes without encumbrance.
  * 
  *  	@(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
- *  $Id: coda_vfsops.c,v 1.15 1999/07/20 07:18:17 phk Exp $
+ *  $Id: coda_vfsops.c,v 1.16 1999/07/21 12:51:36 phk Exp $
  * 
  */
 
 /* 
  * Mach Operating System
  * Copyright (c) 1989 Carnegie-Mellon University
  * All rights reserved.  The CMU software License Agreement specifies
  * the terms and conditions for use and redistribution.
  */
 
 /*
  * This code was written for the Coda file system at Carnegie Mellon
  * University.  Contributers include David Steere, James Kistler, and
  * M. Satyanarayanan.  
  */
 
 #include <vcoda.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/namei.h>
 #include <sys/mount.h>
 #include <sys/select.h>
 
 #include <coda/coda.h>
 #include <coda/cnode.h>
 #include <coda/coda_vfsops.h>
 #include <coda/coda_venus.h>
 #include <coda/coda_subr.h>
 #include <coda/coda_opstats.h>
-
-#include <miscfs/specfs/specdev.h>
 
 MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures");
 
 int codadebug = 0;
 int coda_vfsop_print_entry = 0;
 #define ENTRY    if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
 
 struct vnode *coda_ctlvp;
 struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */
 
 /* structure to keep statistics of internally generated/satisfied calls */
 
 struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];
 
 #define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
 #define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
 #define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
 #define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)
 
 extern int coda_nc_initialized;     /* Set if cache has been initialized */
 extern int vc_nb_open __P((dev_t, int, int, struct proc *));
 
 int
 coda_vfsopstats_init(void)
 {
 	register int i;
 	
 	for (i=0;i<CODA_VFSOPS_SIZE;i++) {
 		coda_vfsopstats[i].opcode = i;
 		coda_vfsopstats[i].entries = 0;
 		coda_vfsopstats[i].sat_intrn = 0;
 		coda_vfsopstats[i].unsat_intrn = 0;
 		coda_vfsopstats[i].gen_intrn = 0;
 	}
 	
 	return 0;
 }
 
 /*
  * cfs mount vfsop
  * Set up mount info record and attach it to vfs struct.
  */
 /*ARGSUSED*/
 int
 coda_mount(vfsp, path, data, ndp, p)
     struct mount *vfsp;		/* Allocated and initialized by mount(2) */
     char *path;			/* path covered: ignored by the fs-layer */
     caddr_t data;		/* Need to define a data type for this in netbsd? */
     struct nameidata *ndp;	/* Clobber this to lookup the device name */
     struct proc *p;		/* The ever-famous proc pointer */
 {
     struct vnode *dvp;
     struct cnode *cp;
     dev_t dev;
     struct coda_mntinfo *mi;
     struct vnode *rootvp;
     ViceFid rootfid;
     ViceFid ctlfid;
     int error;
 
     ENTRY;
 
     coda_vfsopstats_init();
     coda_vnodeopstats_init();
     
     MARK_ENTRY(CODA_MOUNT_STATS);
     if (CODA_MOUNTED(vfsp)) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(EBUSY);
     }
     
     /* Validate mount device.  Similar to getmdev(). */
 
     NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, data, p);
     error = namei(ndp);
     dvp = ndp->ni_vp;
 
     if (error) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return (error);
     }
     if (dvp->v_type != VCHR) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	vrele(dvp);
 	return(ENXIO);
     }
     dev = dvp->v_rdev;
     vrele(dvp);
 
     /*
      * See if the device table matches our expectations.
      */
     if (devsw(dev)->d_open != vc_nb_open)
     {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENXIO);
     }
     
     if (minor(dev) >= NVCODA || minor(dev) < 0) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENXIO);
     }
     
     /*
      * Initialize the mount record and link it to the vfs struct
      */
     mi = &coda_mnttbl[minor(dev)];
     
     if (!VC_OPEN(&mi->mi_vcomm)) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENODEV);
     }
     
     /* No initialization (here) of mi_vcomm! */
     vfsp->mnt_data = (qaddr_t)mi;
     vfs_getnewfsid (vfsp);
 
     mi->mi_vfsp = vfsp;
     
     /*
      * Make a root vnode to placate the Vnode interface, but don't
      * actually make the CODA_ROOT call to venus until the first call
      * to coda_root in case a server is down while venus is starting.
      */
     rootfid.Volume = 0;
     rootfid.Vnode = 0;
     rootfid.Unique = 0;
     cp = make_coda_node(&rootfid, vfsp, VDIR);
     rootvp = CTOV(cp);
     rootvp->v_flag |= VROOT;
 	
     ctlfid.Volume = CTL_VOL;
     ctlfid.Vnode = CTL_VNO;
     ctlfid.Unique = CTL_UNI;
 /*  cp = make_coda_node(&ctlfid, vfsp, VCHR);
     The above code seems to cause a loop in the cnode links.
     I don't totally understand when it happens, it is caught
     when closing down the system.
  */
     cp = make_coda_node(&ctlfid, 0, VCHR);
 
     coda_ctlvp = CTOV(cp);
 
     /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
     mi->mi_vfsp = vfsp;
     mi->mi_rootvp = rootvp;
     
     /* set filesystem block size */
     vfsp->mnt_stat.f_bsize = 8192;	    /* XXX -JJK */
 
     /* Set f_iosize.  XXX -- inamura@isl.ntt.co.jp. 
        For vnode_pager_haspage() references. The value should be obtained 
        from underlying UFS. */
     /* Checked UFS. iosize is set as 8192 */
     vfsp->mnt_stat.f_iosize = 8192;
 
     /* error is currently guaranteed to be zero, but in case some
        code changes... */
     CODADEBUG(1,
 	     myprintf(("coda_mount returned %d\n",error)););
     if (error)
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
     else
 	MARK_INT_SAT(CODA_MOUNT_STATS);
     
     return(error);
 }
 
 int
 coda_start(vfsp, flags, p)
     struct mount *vfsp;
     int flags;
     struct proc *p;
 {
     ENTRY;
     return (0);
 }
 
 int
 coda_unmount(vfsp, mntflags, p)
     struct mount *vfsp;
     int mntflags;
     struct proc *p;
 {
     struct coda_mntinfo *mi = vftomi(vfsp);
     int active, error = 0;
     
     ENTRY;
     MARK_ENTRY(CODA_UMOUNT_STATS);
     if (!CODA_MOUNTED(vfsp)) {
 	MARK_INT_FAIL(CODA_UMOUNT_STATS);
 	return(EINVAL);
     }
     
     if (mi->mi_vfsp == vfsp) {	/* We found the victim */
 	if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
 	    return (EBUSY); 	/* Venus is still running */
 
 #ifdef	DEBUG
 	printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
 #endif
 	vrele(mi->mi_rootvp);
 
 	active = coda_kill(vfsp, NOT_DOWNCALL);
 	mi->mi_rootvp->v_flag &= ~VROOT;
 	error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE);
 	printf("coda_unmount: active = %d, vflush active %d\n", active, error);
 	error = 0;
 	/* I'm going to take this out to allow lookups to go through. I'm
 	 * not sure it's important anyway. -- DCS 2/2/94
 	 */
 	/* vfsp->VFS_DATA = NULL; */
 
 	/* No more vfsp's to hold onto */
 	mi->mi_vfsp = NULL;
 	mi->mi_rootvp = NULL;
 
 	if (error)
 	    MARK_INT_FAIL(CODA_UMOUNT_STATS);
 	else
 	    MARK_INT_SAT(CODA_UMOUNT_STATS);
 
 	return(error);
     }
     return (EINVAL);
 }
 
 /*
  * find root of cfs
  */
 int
 coda_root(vfsp, vpp)
 	struct mount *vfsp;
 	struct vnode **vpp;
 {
     struct coda_mntinfo *mi = vftomi(vfsp);
     struct vnode **result;
     int error;
     struct proc *p = curproc;    /* XXX - bnoble */
     ViceFid VFid;
 
     ENTRY;
     MARK_ENTRY(CODA_ROOT_STATS);
     result = NULL;
     
     if (vfsp == mi->mi_vfsp) {
 	if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) ||
 	    (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) ||
 	    (VTOC(mi->mi_rootvp)->c_fid.Unique != 0))
 	    { /* Found valid root. */
 		*vpp = mi->mi_rootvp;
 		/* On Mach, this is vref.  On NetBSD, VOP_LOCK */
 #if	1
 		vref(*vpp);
 		vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 		vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 		MARK_INT_SAT(CODA_ROOT_STATS);
 		return(0);
 	    }
     }
 
     error = venus_root(vftomi(vfsp), p->p_cred->pc_ucred, p, &VFid);
 
     if (!error) {
 	/*
 	 * Save the new rootfid in the cnode, and rehash the cnode into the
 	 * cnode hash with the new fid key.
 	 */
 	coda_unsave(VTOC(mi->mi_rootvp));
 	VTOC(mi->mi_rootvp)->c_fid = VFid;
 	coda_save(VTOC(mi->mi_rootvp));
 
 	*vpp = mi->mi_rootvp;
 #if	1
 	vref(*vpp);
 	vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 	vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 
 	MARK_INT_SAT(CODA_ROOT_STATS);
 	goto exit;
     } else if (error == ENODEV || error == EINTR) {
 	/* Gross hack here! */
 	/*
 	 * If Venus fails to respond to the CODA_ROOT call, coda_call returns
 	 * ENODEV. Return the uninitialized root vnode to allow vfs
 	 * operations such as unmount to continue. Without this hack,
 	 * there is no way to do an unmount if Venus dies before a 
 	 * successful CODA_ROOT call is done. All vnode operations 
 	 * will fail.
 	 */
 	*vpp = mi->mi_rootvp;
 #if	1
 	vref(*vpp);
 	vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 	vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 
 	MARK_INT_FAIL(CODA_ROOT_STATS);
 	error = 0;
 	goto exit;
     } else {
 	CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
 	MARK_INT_FAIL(CODA_ROOT_STATS);
 		
 	goto exit;
     }
 
  exit:
     return(error);
 }
 
 int
 coda_quotactl(vfsp, cmd, uid, arg, p)
     struct mount *vfsp;
     int cmd;
     uid_t uid;
     caddr_t arg;
     struct proc *p;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
      
 /*
  * Get file system statistics.
  */
 int
 coda_nb_statfs(vfsp, sbp, p)
     register struct mount *vfsp;
     struct statfs *sbp;
     struct proc *p;
 {
     ENTRY;
 /*  MARK_ENTRY(CODA_STATFS_STATS); */
     if (!CODA_MOUNTED(vfsp)) {
 /*	MARK_INT_FAIL(CODA_STATFS_STATS);*/
 	return(EINVAL);
     }
     
     bzero(sbp, sizeof(struct statfs));
     /* XXX - what to do about f_flags, others? --bnoble */
     /* Below This is what AFS does
     	#define NB_SFS_SIZ 0x895440
      */
     /* Note: Normal fs's have a bsize of 0x400 == 1024 */
     sbp->f_type = vfsp->mnt_vfc->vfc_typenum;
     sbp->f_bsize = 8192; /* XXX */
     sbp->f_iosize = 8192; /* XXX */
 #define NB_SFS_SIZ 0x8AB75D
     sbp->f_blocks = NB_SFS_SIZ;
     sbp->f_bfree = NB_SFS_SIZ;
     sbp->f_bavail = NB_SFS_SIZ;
     sbp->f_files = NB_SFS_SIZ;
     sbp->f_ffree = NB_SFS_SIZ;
     bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t));
     snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda");
     snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA");
 /*  MARK_INT_SAT(CODA_STATFS_STATS); */
     return(0);
 }
 
 /*
  * Flush any pending I/O.
  */
 int
 coda_sync(vfsp, waitfor, cred, p)
     struct mount *vfsp;
     int    waitfor;
     struct ucred *cred;
     struct proc *p;
 {
     ENTRY;
     MARK_ENTRY(CODA_SYNC_STATS);
     MARK_INT_SAT(CODA_SYNC_STATS);
     return(0);
 }
 
 int
 coda_vget(vfsp, ino, vpp)
     struct mount *vfsp;
     ino_t ino;
     struct vnode **vpp;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
 
 /* 
  * fhtovp is now what vget used to be in 4.3-derived systems.  For
  * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
  * a type-specific fid.  
  */
 int
 coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp)
     register struct mount *vfsp;    
     struct fid *fhp;
     struct mbuf *nam;
     struct vnode **vpp;
     int *exflagsp;
     struct ucred **creadanonp;
 {
     struct cfid *cfid = (struct cfid *)fhp;
     struct cnode *cp = 0;
     int error;
     struct proc *p = curproc; /* XXX -mach */
     ViceFid VFid;
     int vtype;
 
     ENTRY;
     
     MARK_ENTRY(CODA_VGET_STATS);
     /* Check for vget of control object. */
     if (IS_CTL_FID(&cfid->cfid_fid)) {
 	*vpp = coda_ctlvp;
 	vref(coda_ctlvp);
 	MARK_INT_SAT(CODA_VGET_STATS);
 	return(0);
     }
     
     error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_cred->pc_ucred, p, &VFid, &vtype);
     
     if (error) {
 	CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
 	    *vpp = (struct vnode *)0;
     } else {
 	CODADEBUG(CODA_VGET, 
 		 myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n",
 			VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); )
 	    
 	cp = make_coda_node(&VFid, vfsp, vtype);
 	*vpp = CTOV(cp);
     }
     return(error);
 }
 
 int
 coda_vptofh(vnp, fidp)
     struct vnode *vnp;
     struct fid   *fidp;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
 
 int
 coda_init(struct vfsconf *vfsp)
 {
     ENTRY;
     return 0;
 }
 
 /*
  * To allow for greater ease of use, some vnodes may be orphaned when
  * Venus dies.  Certain operations should still be allowed to go
  * through, but without propagating ophan-ness.  So this function will
  * get a new vnode for the file from the current run of Venus.  */
  
 int
 getNewVnode(vpp)
      struct vnode **vpp;
 {
     struct cfid cfid;
     struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);
     
     ENTRY;
 
     cfid.cfid_len = (short)sizeof(ViceFid);
     cfid.cfid_fid = VTOC(*vpp)->c_fid;	/* Structure assignment. */
     /* XXX ? */
 
     /* We're guessing that if set, the 1st element on the list is a
      * valid vnode to use. If not, return ENODEV as venus is dead.
      */
     if (mi->mi_vfsp == NULL)
 	return ENODEV;
     
     return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
 		      NULL, NULL);
 }
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 /* get the mount structure corresponding to a given device.  Assume 
  * device corresponds to a UFS. Return NULL if no device is found.
  */ 
 struct mount *devtomp(dev)
     dev_t dev;
 {
     struct mount *mp, *nmp;
     
     for (mp = mountlist.cqh_first; mp != (void*)&mountlist; mp = nmp) {
 	nmp = mp->mnt_list.cqe_next;
 	if (((VFSTOUFS(mp))->um_dev == dev)) {
 	    /* mount corresponds to UFS and the device matches one we want */
 	    return(mp); 
 	}
     }
     /* mount structure wasn't found */ 
     return(NULL); 
 }
 
 struct vfsops coda_vfsops = {
     coda_mount,
     coda_start,
     coda_unmount,
     coda_root,
     coda_quotactl,
     coda_nb_statfs,
     coda_sync,
     coda_vget,
     (int (*) (struct mount *, struct fid *, struct sockaddr *, struct vnode **,
 	      int *, struct ucred **))
 	eopnotsupp,
     (int (*) (struct vnode *, struct fid *)) eopnotsupp,
     coda_init,
 };
 
 VFS_SET(coda_vfsops, coda, VFCF_NETWORK);
Index: head/sys/contrib/softupdates/ffs_softdep.c
===================================================================
--- head/sys/contrib/softupdates/ffs_softdep.c	(revision 49534)
+++ head/sys/contrib/softupdates/ffs_softdep.c	(revision 49535)
@@ -1,4485 +1,4485 @@
 /*
  * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
  * "Soft Updates: A Solution to the Metadata Update Problem in File
  * Systems", CSE-TR-254-95, August 1995).
  *
  * The following are the copyrights and redistribution conditions that
  * apply to this copy of the soft update software. For a license
  * to use, redistribute or sell the soft update software under
  * conditions other than those described here, please contact the
  * author at one of the following addresses:
  *
  *	Marshall Kirk McKusick		mckusick@mckusick.com
  *	1614 Oxford Street		+1-510-843-9542
  *	Berkeley, CA 94709-1608
  *	USA
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. None of the names of McKusick, Ganger, Patt, or the University of
  *    Michigan may be used to endorse or promote products derived from
  *    this software without specific prior written permission.
  * 4. Redistributions in any form must be accompanied by information on
  *    how to obtain complete source code for any accompanying software
  *    that uses this software. This source code must either be included
  *    in the distribution or be available for no more than the cost of
  *    distribution plus a nominal fee, and must be freely redistributable
  *    under reasonable conditions. For an executable file, complete
  *    source code means the source code for all modules it contains.
  *    It does not mean source code for modules or files that typically
  *    accompany the operating system on which the executable file runs,
  *    e.g., standard library modules or system header files.
  *
  * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.40 (McKusick) 6/15/99
- *	$Id: ffs_softdep.c,v 1.33 1999/06/27 13:26:23 peter Exp $
+ *	$Id: ffs_softdep.c,v 1.34 1999/06/29 15:57:40 mckusick Exp $
  */
 
 /*
  * For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide.
  */
 #ifndef DIAGNOSTIC
 #define DIAGNOSTIC
 #endif
 #ifndef DEBUG
 #define DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/syslog.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
+#include <sys/conf.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/softdep.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
 /*
  * These definitions need to be adapted to the system to which
  * this file is being ported.
  */
 /*
  * malloc types defined for the softdep system.
  */
 MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies");
 MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies");
 MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation");
 MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map");
 MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode");
 MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies");
 MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block");
 MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode");
 MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode");
 MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated");
 MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");
 MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");
 MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");
 
 #define	D_PAGEDEP	0
 #define	D_INODEDEP	1
 #define	D_NEWBLK	2
 #define	D_BMSAFEMAP	3
 #define	D_ALLOCDIRECT	4
 #define	D_INDIRDEP	5
 #define	D_ALLOCINDIR	6
 #define	D_FREEFRAG	7
 #define	D_FREEBLKS	8
 #define	D_FREEFILE	9
 #define	D_DIRADD	10
 #define	D_MKDIR		11
 #define	D_DIRREM	12
 #define D_LAST		D_DIRREM
 
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
  */
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
 	M_NEWBLK,
 	M_BMSAFEMAP,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
 	M_FREEFRAG,
 	M_FREEBLKS,
 	M_FREEFILE,
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM
 };
 
 #define DtoM(type) (memtype[type])
 
 /*
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
 	((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")
 #define CURPROC curproc
 /*
  * End system adaptaion definitions.
  */
 
 /*
  * Internal function prototypes.
  */
 static	void softdep_error __P((char *, int));
 static	void drain_output __P((struct vnode *, int));
 static	int getdirtybuf __P((struct buf **, int));
 static	void clear_remove __P((struct proc *));
 static	void clear_inodedeps __P((struct proc *));
 static	int flush_pagedep_deps __P((struct vnode *, struct mount *,
 	    struct diraddhd *));
 static	int flush_inodedep_deps __P((struct fs *, ino_t));
 static	int handle_written_filepage __P((struct pagedep *, struct buf *));
 static  void diradd_inode_written __P((struct diradd *, struct inodedep *));
 static	int handle_written_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_allocdirect_partdone __P((struct allocdirect *));
 static	void handle_allocindir_partdone __P((struct allocindir *));
 static	void initiate_write_filepage __P((struct pagedep *, struct buf *));
 static	void handle_written_mkdir __P((struct mkdir *, int));
 static	void initiate_write_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_workitem_freefile __P((struct freefile *));
 static	void handle_workitem_remove __P((struct dirrem *));
 static	struct dirrem *newdirrem __P((struct buf *, struct inode *,
 	    struct inode *, int));
 static	void free_diradd __P((struct diradd *));
 static	void free_allocindir __P((struct allocindir *, struct inodedep *));
 static	int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t,
 	    long *));
 static	void deallocate_dependencies __P((struct buf *, struct inodedep *));
 static	void free_allocdirect __P((struct allocdirectlst *,
 	    struct allocdirect *, int));
 static	int free_inodedep __P((struct inodedep *));
 static	void handle_workitem_freeblocks __P((struct freeblks *));
 static	void merge_inode_lists __P((struct inodedep *));
 static	void setup_allocindir_phase2 __P((struct buf *, struct inode *,
 	    struct allocindir *));
 static	struct allocindir *newallocindir __P((struct inode *, int, ufs_daddr_t,
 	    ufs_daddr_t));
 static	void handle_workitem_freefrag __P((struct freefrag *));
 static	struct freefrag *newfreefrag __P((struct inode *, ufs_daddr_t, long));
 static	void allocdirect_merge __P((struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *));
 static	struct bmsafemap *bmsafemap_lookup __P((struct buf *));
 static	int newblk_lookup __P((struct fs *, ufs_daddr_t, int,
 	    struct newblk **));
 static	int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
 static	int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
 	    struct pagedep **));
 static	void pause_timer __P((void *));
 static	int request_cleanup __P((int, int));
 static	void add_to_worklist __P((struct worklist *));
 
 /*
  * Exported softdep operations.
  */
 struct bio_ops bioops = {
 	softdep_disk_io_initiation,		/* io_start */
 	softdep_disk_write_complete,		/* io_complete */
 	softdep_deallocate_dependencies,	/* io_deallocate */
 	softdep_fsync,				/* io_fsync */
 	softdep_process_worklist,		/* io_sync */
 };
 
 /*
  * Locking primitives.
  *
  * For a uniprocessor, all we need to do is protect against disk
  * interrupts. For a multiprocessor, this lock would have to be
  * a mutex. A single mutex is used throughout this file, though
  * finer grain locking could be used if contention warranted it.
  *
  * For a multiprocessor, the sleep call would accept a lock and
  * release it after the sleep processing was complete. In a uniprocessor
  * implementation there is no such interlock, so we simple mark
  * the places where it needs to be done with the `interlocked' form
  * of the lock calls. Since the uniprocessor sleep already interlocks
  * the spl, there is nothing that really needs to be done.
  */
 #ifndef /* NOT */ DEBUG
 static struct lockit {
 	int	lkt_spl;
 } lk = { 0 };
 #define ACQUIRE_LOCK(lk)		(lk)->lkt_spl = splbio()
 #define FREE_LOCK(lk)			splx((lk)->lkt_spl)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)
 #define FREE_LOCK_INTERLOCKED(lk)
 
 #else /* DEBUG */
 static struct lockit {
 	int	lkt_spl;
 	pid_t	lkt_held;
 } lk = { 0, -1 };
 static int lockcnt;
 
 static	void acquire_lock __P((struct lockit *));
 static	void free_lock __P((struct lockit *));
 static	void acquire_lock_interlocked __P((struct lockit *));
 static	void free_lock_interlocked __P((struct lockit *));
 
 #define ACQUIRE_LOCK(lk)		acquire_lock(lk)
 #define FREE_LOCK(lk)			free_lock(lk)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)	acquire_lock_interlocked(lk)
 #define FREE_LOCK_INTERLOCKED(lk)	free_lock_interlocked(lk)
 
 static void
 acquire_lock(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held != -1) {
 		if (lk->lkt_held == CURPROC->p_pid)
 			panic("softdep_lock: locking against myself");
 		else
 			panic("softdep_lock: lock held by %d", lk->lkt_held);
 	}
 	lk->lkt_spl = splbio();
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock: lock not held");
 	lk->lkt_held = -1;
 	splx(lk->lkt_spl);
 }
 
 static void
 acquire_lock_interlocked(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held != -1) {
 		if (lk->lkt_held == CURPROC->p_pid)
 			panic("softdep_lock_interlocked: locking against self");
 		else
 			panic("softdep_lock_interlocked: lock held by %d",
 			    lk->lkt_held);
 	}
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock_interlocked(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock_interlocked: lock not held");
 	lk->lkt_held = -1;
 }
 #endif /* DEBUG */
 
 /*
  * Place holder for real semaphores.
  */
 struct sema {
 	int	value;
 	pid_t	holder;
 	char	*name;
 	int	prio;
 	int	timo;
 };
 static	void sema_init __P((struct sema *, char *, int, int));
 static	int sema_get __P((struct sema *, struct lockit *));
 static	void sema_release __P((struct sema *));
 
 static void
 sema_init(semap, name, prio, timo)
 	struct sema *semap;
 	char *name;
 	int prio, timo;
 {
 
 	semap->holder = -1;
 	semap->value = 0;
 	semap->name = name;
 	semap->prio = prio;
 	semap->timo = timo;
 }
 
 static int
 sema_get(semap, interlock)
 	struct sema *semap;
 	struct lockit *interlock;
 {
 
 	if (semap->value++ > 0) {
 		if (interlock != NULL)
 			FREE_LOCK_INTERLOCKED(interlock);
 		tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
 		if (interlock != NULL) {
 			ACQUIRE_LOCK_INTERLOCKED(interlock);
 			FREE_LOCK(interlock);
 		}
 		return (0);
 	}
 	semap->holder = CURPROC->p_pid;
 	if (interlock != NULL)
 		FREE_LOCK(interlock);
 	return (1);
 }
 
 static void
 sema_release(semap)
 	struct sema *semap;
 {
 
 	if (semap->value <= 0 || semap->holder != CURPROC->p_pid)
 		panic("sema_release: not held");
 	if (--semap->value > 0) {
 		semap->value = 0;
 		wakeup(semap);
 	}
 	semap->holder = -1;
 }
 
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
 #ifndef /* NOT */ DEBUG
 #define WORKLIST_INSERT(head, item) do {	\
 	(item)->wk_state |= ONWORKLIST;		\
 	LIST_INSERT_HEAD(head, item, wk_list);	\
 } while (0)
 #define WORKLIST_REMOVE(item) do {		\
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
 #define WORKITEM_FREE(item, type) FREE(item, DtoM(type))
 
 #else /* DEBUG */
 static	void worklist_insert __P((struct workhead *, struct worklist *));
 static	void worklist_remove __P((struct worklist *));
 static	void workitem_free __P((struct worklist *, int));
 
 #define WORKLIST_INSERT(head, item) worklist_insert(head, item)
 #define WORKLIST_REMOVE(item) worklist_remove(item)
 #define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type)
 
 static void
 worklist_insert(head, item)
 	struct workhead *head;
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_insert: lock not held");
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: already on list");
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
 worklist_remove(item)
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_remove: lock not held");
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: not on list");
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 
 static void
 workitem_free(item, type)
 	struct worklist *item;
 	int type;
 {
 
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: still on list");
 	if (item->wk_type != type)
 		panic("workitem_free: type mismatch");
 	FREE(item, DtoM(type));
 }
 #endif /* DEBUG */
 
 /*
  * Workitem queue management
  */
 static struct workhead softdep_workitem_pending;
 static int softdep_worklist_busy;
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static struct proc *filesys_syncer; /* proc of filesystem syncer process */
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 #define FLUSH_INODES	1
 static int req_clear_remove;	/* syncer process flush some freeblks */
 #define FLUSH_REMOVE	2
 /*
  * runtime statistics
  */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
 static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
 static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
 static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 #ifdef DEBUG
 #include <vm/vm.h>
 #include <sys/sysctl.h>
 #if defined(__FreeBSD__)
 SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
 #else /* !__FreeBSD__ */
 struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
 struct ctldebug debug21 = { "tickdelay", &tickdelay };
 struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
 struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
 struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
 struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
 struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
 struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
 struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
 struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
 #endif	/* !__FreeBSD__ */
 
 #endif /* DEBUG */
 
 /*
  * Add an item to the end of the work queue.
  * This routine requires that the lock be held.
  * This is the only routine that adds items to the list.
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
 static void
 add_to_worklist(wk)
 	struct worklist *wk;
 {
 	static struct worklist *worklist_tail;
 
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: already on list");
 	wk->wk_state |= ONWORKLIST;
 	if (LIST_FIRST(&softdep_workitem_pending) == NULL)
 		LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list);
 	else
 		LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
 	worklist_tail = wk;
 }
 
 /*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
  * appear in the queue. The code below depends on this property to ensure
  * that blocks of a file are freed before the inode itself is freed. This
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
 int 
 softdep_process_worklist(matchmnt)
 	struct mount *matchmnt;
 {
 	struct proc *p = CURPROC;
 	struct worklist *wk;
 	struct fs *matchfs;
 	int matchcnt;
 
 	/*
 	 * Record the process identifier of our caller so that we can give
 	 * this process preferential treatment in request_cleanup below.
 	 */
 	filesys_syncer = p;
 	matchcnt = 0;
 	matchfs = NULL;
 	if (matchmnt != NULL)
 		matchfs = VFSTOUFS(matchmnt)->um_fs;
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code. It is single threaded solely so that softdep_flushfiles
 	 * (below) can get an accurate count of the number of items
 	 * related to its mount point that are in the list.
 	 */
 	if (softdep_worklist_busy && matchmnt == NULL)
 		return (-1);
 	/*
 	 * If requested, try removing inode or removal dependencies.
 	 */
 	if (req_clear_inodedeps) {
 		clear_inodedeps(p);
 		req_clear_inodedeps = 0;
 		wakeup(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		clear_remove(p);
 		req_clear_remove = 0;
 		wakeup(&proc_waiting);
 	}
 	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) {
 		WORKLIST_REMOVE(wk);
 		FREE_LOCK(&lk);
 		switch (wk->wk_type) {
 
 		case D_DIRREM:
 			/* removal of a directory entry */
 			if (WK_DIRREM(wk)->dm_mnt == matchmnt)
 				matchcnt += 1;
 			handle_workitem_remove(WK_DIRREM(wk));
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
 			if (WK_FREEBLKS(wk)->fb_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freeblocks(WK_FREEBLKS(wk));
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
 			if (WK_FREEFRAG(wk)->ff_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
 			if (WK_FREEFILE(wk)->fx_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freefile(WK_FREEFILE(wk));
 			break;
 
 		default:
 			panic("%s_process_worklist: Unknown type %s",
 			    "softdep", TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 		if (softdep_worklist_busy && matchmnt == NULL)
 			return (-1);
 		/*
 		 * If requested, try removing inode or removal dependencies.
 		 */
 		if (req_clear_inodedeps) {
 			clear_inodedeps(p);
 			req_clear_inodedeps = 0;
 			wakeup(&proc_waiting);
 		}
 		if (req_clear_remove) {
 			clear_remove(p);
 			req_clear_remove = 0;
 			wakeup(&proc_waiting);
 		}
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
 	return (matchcnt);
 }
 
 /*
  * Purge the work list of all items associated with a particular mount point.
  */
 int
 softdep_flushfiles(oldmnt, flags, p)
 	struct mount *oldmnt;
 	int flags;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	int error, loopcnt;
 
 	/*
 	 * Await our turn to clear out the queue.
 	 */
 	while (softdep_worklist_busy)
 		tsleep(&lbolt, PRIBIO, "softflush", 0);
 	softdep_worklist_busy = 1;
 	if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) {
 		softdep_worklist_busy = 0;
 		return (error);
 	}
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. In theory, this loop can happen at most twice,
 	 * but we give it a few extra just to be sure.
 	 */
 	devvp = VFSTOUFS(oldmnt)->um_devvp;
 	for (loopcnt = 10; loopcnt > 0; loopcnt--) {
 		if (softdep_process_worklist(oldmnt) == 0) {
 			/*
 			 * Do another flush in case any vnodes were brought in
 			 * as part of the cleanup operations.
 			 */
 			if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
 				break;
 			/*
 			 * If we still found nothing to do, we are really done.
 			 */
 			if (softdep_process_worklist(oldmnt) == 0)
 				break;
 		}
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
 		VOP_UNLOCK(devvp, 0, p);
 		if (error)
 			break;
 	}
 	softdep_worklist_busy = 0;
 	/*
 	 * If we are unmounting then it is an error to fail. If we
 	 * are simply trying to downgrade to read-only, then filesystem
 	 * activity can keep us busy forever, so we just fail with EBUSY.
 	 */
 	if (loopcnt == 0) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
 			panic("softdep_flushfiles: looping");
 		error = EBUSY;
 	}
 	return (error);
 }
 
 /*
  * Structure hashing.
  * 
  * There are three types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
  * This separation helps when the in-memory copy of an inode or
  * file block must be replaced. It also obviates the need to access
  * an inode or file page when simply updating (or de-allocating)
  * dependency structures. Lookup of newblk structures is needed to
  * find newly allocated blocks when trying to associate them with
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
  * an existing entry is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
 LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
 u_long	pagedep_hash;		/* size of hash table - 1 */
 #define	PAGEDEP_HASH(mp, inum, lbn) \
 	(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
 	    pagedep_hash])
 static struct sema pagedep_in_progress;
 
 /*
  * Look up a pagedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(ip, lbn, flags, pagedeppp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct mount *mp;
 	int i;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("pagedep_lookup: lock not held");
 #endif
 	mp = ITOV(ip)->v_mount;
 	pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
 top:
 	for (pagedep = LIST_FIRST(pagedephd); pagedep;
 	     pagedep = LIST_NEXT(pagedep, pd_hash))
 		if (ip->i_number == pagedep->pd_ino &&
 		    lbn == pagedep->pd_lbn &&
 		    mp == pagedep->pd_mnt)
 			break;
 	if (pagedep) {
 		*pagedeppp = pagedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*pagedeppp = NULL;
 		return (0);
 	}
 	if (sema_get(&pagedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
 		M_WAITOK);
 	bzero(pagedep, sizeof(struct pagedep));
 	pagedep->pd_list.wk_type = D_PAGEDEP;
 	pagedep->pd_mnt = mp;
 	pagedep->pd_ino = ip->i_number;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	sema_release(&pagedep_in_progress);
 	*pagedeppp = pagedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with inodedep caching.
  */
 LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
 static u_long	inodedep_hash;	/* size of hash table - 1 */
 static long	num_inodedep;	/* number of inodedep allocated */
 #define	INODEDEP_HASH(fs, inum) \
       (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
 static struct sema inodedep_in_progress;
 
 /*
  * Look up a inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(fs, inum, flags, inodedeppp)
 	struct fs *fs;
 	ino_t inum;
 	int flags;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
 	int firsttry;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("inodedep_lookup: lock not held");
 #endif
 	firsttry = 1;
 	inodedephd = INODEDEP_HASH(fs, inum);
 top:
 	for (inodedep = LIST_FIRST(inodedephd); inodedep;
 	     inodedep = LIST_NEXT(inodedep, id_hash))
 		if (inum == inodedep->id_ino && fs == inodedep->id_fs)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*inodedeppp = NULL;
 		return (0);
 	}
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 &&
 	    request_cleanup(FLUSH_INODES, 1)) {
 		firsttry = 0;
 		goto top;
 	}
 	if (sema_get(&inodedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	num_inodedep += 1;
 	MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
 		M_INODEDEP, M_WAITOK);
 	inodedep->id_list.wk_type = D_INODEDEP;
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
 	inodedep->id_state = ALLCOMPLETE;
 	inodedep->id_nlinkdelta = 0;
 	inodedep->id_savedino = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_buf = NULL;
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	sema_release(&inodedep_in_progress);
 	*inodedeppp = inodedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with newblk caching.
  */
 LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
 u_long	newblk_hash;		/* size of hash table - 1 */
 #define	NEWBLK_HASH(fs, inum) \
 	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
 static struct sema newblk_in_progress;
 
 /*
  * Look up a newblk. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in newblkpp.
  */
 static int
 newblk_lookup(fs, newblkno, flags, newblkpp)
 	struct fs *fs;
 	ufs_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 
 	newblkhd = NEWBLK_HASH(fs, newblkno);
 top:
 	for (newblk = LIST_FIRST(newblkhd); newblk;
 	     newblk = LIST_NEXT(newblk, nb_hash))
 		if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
 			break;
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*newblkpp = NULL;
 		return (0);
 	}
 	if (sema_get(&newblk_in_progress, 0) == 0)
 		goto top;
 	MALLOC(newblk, struct newblk *, sizeof(struct newblk),
 		M_NEWBLK, M_WAITOK);
 	newblk->nb_state = 0;
 	newblk->nb_fs = fs;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	sema_release(&newblk_in_progress);
 	*newblkpp = newblk;
 	return (0);
 }
 
 /*
  * Executed during filesystem system initialization before
  * mounting any file systems.
  */
 void 
 softdep_initialize()
 {
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
 	max_softdeps = desiredvnodes * 8;
 	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
 	    &pagedep_hash);
 	sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
 	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
 	sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
 	newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash);
 	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
 }
 
 /*
  * Called at mount time to notify the dependency code that a
  * filesystem wishes to use it.
  */
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct csum cstotal;
 	struct cg *cgp;
 	struct buf *bp;
 	int error, cyl;
 
 	mp->mnt_flag &= ~MNT_ASYNC;
 	mp->mnt_flag |= MNT_SOFTDEP;
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync, so we have
 	 * to scan the cylinder groups and recalculate them.
 	 */
 	if (fs->fs_clean != 0)
 		return (0);
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
 		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
 		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
 		fs->fs_cs(fs, cyl) = cgp->cg_cs;
 		brelse(bp);
 	}
 #ifdef DEBUG
 	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
 		printf("ffs_mountfs: superblock updated for soft updates\n");
 #endif
 	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
 	return (0);
 }
 
 /*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a file system
  * after a power failure, one must (conservatively) guarantee that the
  * on-disk copy of the bitmaps never indicate that a live inode or block is
  * free.  So, when a block or inode is allocated, the bitmap should be
  * updated (on disk) before any new pointers.  When a block or inode is
  * freed, the bitmap should not be updated until all pointers have been
  * reset.  The latter dependency is handled by the delayed de-allocation
  * approach described below for block and inode de-allocation.  The former
  * dependency is handled by calling the following procedure when a block or
  * inode is allocated. When an inode is allocated an "inodedep" is created
  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
  * Each "inodedep" is also inserted into the hash indexing structure so
  * that any additional link additions can be made dependent on the inode
  * allocation.
  * 
  * The ufs file system maintains a number of free block counts (e.g., per
  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
  * in addition to the bitmaps.  These counts are used to improve efficiency
  * during allocation and therefore must be consistent with the bitmaps.
  * There is no convenient way to guarantee post-crash consistency of these
  * counts with simple update ordering, for two main reasons: (1) The counts
  * and bitmaps for a single cylinder group block are not in the same disk
  * sector.  If a disk write is interrupted (e.g., by power failure), one may
  * be written and the other not.  (2) Some of the counts are located in the
  * superblock rather than the cylinder group block. So, we focus our soft
  * updates implementation on protecting the bitmaps. When mounting a
  * filesystem, we recompute the auxiliary counts from the bitmaps.
  */
 
 /*
  * Called just after updating the cylinder group block to allocate an inode.
  */
 void
 softdep_setup_inomapdep(bp, ip, newinum)
 	struct buf *bp;		/* buffer for cylgroup block with inode map */
 	struct inode *ip;	/* inode related to allocation */
 	ino_t newinum;		/* new inode number being allocated */
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC, &inodedep) != 0)
 		panic("softdep_setup_inomapdep: found inode");
 	inodedep->id_buf = bp;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called just after updating the cylinder group block to
  * allocate block or fragment.
  */
 void
 softdep_setup_blkmapdep(bp, fs, newblkno)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct fs *fs;		/* filesystem doing allocation */
 	ufs_daddr_t newblkno;	/* number of newly allocated block */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	ACQUIRE_LOCK(&lk);
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
  * this routine is called and this routine must be called with
  * splbio interrupts blocked.
  */
 static struct bmsafemap *
 bmsafemap_lookup(bp)
 	struct buf *bp;
 {
 	struct bmsafemap *bmsafemap;
 	struct worklist *wk;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("bmsafemap_lookup: lock not held");
 #endif
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list))
 		if (wk->wk_type == D_BMSAFEMAP)
 			return (WK_BMSAFEMAP(wk));
 	FREE_LOCK(&lk);
 	MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
 		M_BMSAFEMAP, M_WAITOK);
 	bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
 	bmsafemap->sm_list.wk_state = 0;
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_allocdirecthd);
 	LIST_INIT(&bmsafemap->sm_allocindirhd);
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
 /*
  * Direct block allocation dependencies.
  * 
  * When a new block is allocated, the corresponding disk locations must be
  * initialized (with zeros or new data) before the on-disk inode points to
  * them.  Also, the freemap from which the block was allocated must be
  * updated (on disk) before the inode's pointer. These two dependencies are
  * independent of each other and are needed for all file blocks and indirect
  * blocks that are pointed to directly by the inode.  Just before the
  * "in-core" version of the inode is updated with a newly allocated block
  * number, a procedure (below) is called to setup allocation dependency
  * structures.  These structures are removed when the corresponding
  * dependencies are satisfied or when the block allocation becomes obsolete
  * (i.e., the file is deleted, the block is de-allocated, or the block is a
  * fragment that gets upgraded).  All of these cases are handled in
  * procedures described later.
  * 
  * When a file extension causes a fragment to be upgraded, either to a larger
  * fragment or to a full block, the on-disk location may change (if the
  * previous fragment could not simply be extended). In this case, the old
  * fragment must be de-allocated, but not until after the inode's pointer has
  * been updated. In most cases, this is handled by later procedures, which
  * will construct a "freefrag" structure to be added to the workitem queue
  * when the inode update is complete (or obsolete).  The main exception to
  * this is when an allocation occurs while a pending allocation dependency
  * (for the same block pointer) remains.  This case is handled in the main
  * allocation dependency setup procedure by immediately freeing the
  * unreferenced fragments.
  */ 
 void 
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
 	ufs_lbn_t lbn;		/* block pointer within inode */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
 	long oldsize;		/* size of new block */
 	struct buf *bp;		/* bp for allocated block */
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct bmsafemap *bmsafemap;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct newblk *newblk;
 
 	MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
 		M_ALLOCDIRECT, M_WAITOK);
 	bzero(adp, sizeof(struct allocdirect));
 	adp->ad_list.wk_type = D_ALLOCDIRECT;
 	adp->ad_lbn = lbn;
 	adp->ad_newblkno = newblkno;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 	adp->ad_state = ATTACHED;
 	if (newblkno == oldblkno)
 		adp->ad_freefrag = NULL;
 	else
 		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
 
 	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	if (newblk->nb_state == DEPCOMPLETE) {
 		adp->ad_state |= DEPCOMPLETE;
 		adp->ad_buf = NULL;
 	} else {
 		bmsafemap = newblk->nb_bmsafemap;
 		adp->ad_buf = bmsafemap->sm_buf;
 		LIST_REMOVE(newblk, nb_deps);
 		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
 	}
 	LIST_REMOVE(newblk, nb_hash);
 	FREE(newblk, M_NEWBLK);
 
 	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
 		/* allocating an indirect block */
 		if (oldblkno != 0)
 			panic("softdep_setup_allocdirect: non-zero indir");
 	} else {
 		/*
 		 * Allocating a direct block.
 		 *
 		 * If we are allocating a directory block, then we must
 		 * allocate an associated pagedep to track additions and
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR &&
 		    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	}
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_lbn == lbn)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(&lk);
 		return;
 	}
 	for (oldadp = TAILQ_FIRST(adphead); oldadp;
 	     oldadp = TAILQ_NEXT(oldadp, ad_next)) {
 		if (oldadp->ad_lbn >= lbn)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocdirect: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_lbn == lbn)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
 	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
 	struct allocdirect *newadp;	/* allocdirect being added */
 	struct allocdirect *oldadp;	/* existing allocdirect being checked */
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("allocdirect_merge: lock not held");
 #endif
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_lbn >= NDADDR)
 		panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
 		    newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
 		    NDADDR);
 	newadp->ad_oldblkno = oldadp->ad_oldblkno;
 	newadp->ad_oldsize = oldadp->ad_oldsize;
 	/*
 	 * If the old dependency had a fragment to free or had never
 	 * previously had a block allocated, then the new dependency
 	 * can immediately post its freefrag and adopt the old freefrag.
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
 	 * the worklist when it is freed by free_allocdirect. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
 	 * Here, the first part of the fragment allocated to the new
 	 * dependency is part of the block currently claimed on disk by
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
 		freefrag = newadp->ad_freefrag;
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
 	free_allocdirect(adphead, oldadp, 0);
 }
 		
 /*
  * Allocate a new freefrag structure if needed.
  */
 static struct freefrag *
 newfreefrag(ip, blkno, size)
 	struct inode *ip;
 	ufs_daddr_t blkno;
 	long size;
 {
 	struct freefrag *freefrag;
 	struct fs *fs;
 
 	if (blkno == 0)
 		return (NULL);
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
 		M_FREEFRAG, M_WAITOK);
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST;	/* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_fs = fs;
 	freefrag->ff_devvp = ip->i_devvp;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
 	return (freefrag);
 }
 
 /*
  * This workitem de-allocates fragments that were replaced during
  * file block allocation.
  */
 static void 
 handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct inode tip;
 
 	tip.i_fs = freefrag->ff_fs;
 	tip.i_devvp = freefrag->ff_devvp;
 	tip.i_dev = freefrag->ff_devvp->v_rdev;
 	tip.i_number = freefrag->ff_inum;
 	tip.i_uid = freefrag->ff_state & ~ONWORKLIST;	/* XXX - set above */
 	ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
 	FREE(freefrag, M_FREEFRAG);
 }
 
 /*
  * Indirect block allocation dependencies.
  * 
  * The same dependencies that exist for a direct block also exist when
  * a new block is allocated and pointed to by an entry in a block of
  * indirect pointers. The undo/redo states described above are also
  * used here. Because an indirect block contains many pointers that
  * may have dependencies, a second copy of the entire in-memory indirect
  * block is kept. The buffer cache copy is always completely up-to-date.
  * The second copy, which is used only as a source for disk writes,
  * contains only the safe pointers (i.e., those that have no remaining
  * update dependencies). The second copy is freed when all pointers
  * are safe. The cache is not allowed to replace indirect blocks with
  * pending update dependencies. If a buffer containing an indirect
  * block with dependencies is written, these routines will mark it
  * dirty again. It can only be successfully written once all the
  * dependencies are removed. The ffs_fsync routine in conjunction with
  * softdep_sync_metadata work together to get all the dependencies
  * removed so that a file can be successfully written to disk. Three
  * procedures are used when setting up indirect block pointer
  * dependencies. The division is necessary because of the organization
  * of the "balloc" routine and because of the distinction between file
  * pages and file metadata blocks.
  */
 
 /*
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
 newallocindir(ip, ptrno, newblkno, oldblkno)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 {
 	struct allocindir *aip;
 
 	MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
 		M_ALLOCINDIR, M_WAITOK);
 	bzero(aip, sizeof(struct allocindir));
 	aip->ai_list.wk_type = D_ALLOCINDIR;
 	aip->ai_state = ATTACHED;
 	aip->ai_offset = ptrno;
 	aip->ai_newblkno = newblkno;
 	aip->ai_oldblkno = oldblkno;
 	aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
 	return (aip);
 }
 
 /*
  * Called just before setting an indirect block pointer
  * to a newly allocated file page.
  */
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;	/* inode for file being extended */
 	ufs_lbn_t lbn;		/* allocated block number within file */
 	struct buf *bp;		/* buffer with indirect blk referencing page */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno);
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR &&
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called just before setting an indirect block pointer to a
  * newly allocated indirect block.
  */
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;	/* newly allocated indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct buf *bp;		/* indirect block referencing allocated block */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 {
 	struct allocindir *aip;
 
 	aip = newallocindir(ip, ptrno, newblkno, 0);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static void 
 setup_allocindir_phase2(bp, ip, aip)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
 {
 	struct worklist *wk;
 	struct indirdep *indirdep, *newindirdep;
 	struct bmsafemap *bmsafemap;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	for (indirdep = NULL, newindirdep = NULL; ; ) {
 		ACQUIRE_LOCK(&lk);
 		for (wk = LIST_FIRST(&bp->b_dep); wk;
 		     wk = LIST_NEXT(wk, wk_list)) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
 			indirdep = WK_INDIRDEP(wk);
 			break;
 		}
 		if (indirdep == NULL && newindirdep) {
 			indirdep = newindirdep;
 			WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
 			newindirdep = NULL;
 		}
 		FREE_LOCK(&lk);
 		if (indirdep) {
 			if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
 			    &newblk) == 0)
 				panic("setup_allocindir: lost block");
 			ACQUIRE_LOCK(&lk);
 			if (newblk->nb_state == DEPCOMPLETE) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 			} else {
 				bmsafemap = newblk->nb_bmsafemap;
 				aip->ai_buf = bmsafemap->sm_buf;
 				LIST_REMOVE(newblk, nb_deps);
 				LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
 				    aip, ai_deps);
 			}
 			LIST_REMOVE(newblk, nb_hash);
 			FREE(newblk, M_NEWBLK);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
 			 * for this block. If there is, merge the old
 			 * dependency into the new one.
 			 */
 			if (aip->ai_oldblkno == 0)
 				oldaip = NULL;
 			else
 				for (oldaip=LIST_FIRST(&indirdep->ir_deplisthd);
 				    oldaip; oldaip = LIST_NEXT(oldaip, ai_next))
 					if (oldaip->ai_offset == aip->ai_offset)
 						break;
 			if (oldaip != NULL) {
 				if (oldaip->ai_newblkno != aip->ai_oldblkno)
 					panic("setup_allocindir_phase2: blkno");
 				aip->ai_oldblkno = oldaip->ai_oldblkno;
 				freefrag = oldaip->ai_freefrag;
 				oldaip->ai_freefrag = aip->ai_freefrag;
 				aip->ai_freefrag = freefrag;
 				free_allocindir(oldaip, NULL);
 			}
 			LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
 			((ufs_daddr_t *)indirdep->ir_savebp->b_data)
 			    [aip->ai_offset] = aip->ai_oldblkno;
 			FREE_LOCK(&lk);
 		}
 		if (newindirdep) {
 			if (indirdep->ir_savebp != NULL)
 				brelse(newindirdep->ir_savebp);
 			WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
 		}
 		if (indirdep)
 			break;
 		MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
 			M_INDIRDEP, M_WAITOK);
 		newindirdep->ir_list.wk_type = D_INDIRDEP;
 		newindirdep->ir_state = ATTACHED;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
 		if (bp->b_blkno == bp->b_lblkno) {
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 		}
 		newindirdep->ir_savebp =
 		    getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
 	}
 }
 
 /*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
  * the blocks are made available for use by other files.  (The true
  * requirement is that old pointers must be nullified before new on-disk
  * pointers are set.  We chose this slightly more stringent requirement to
  * reduce complexity.) Our implementation handles this dependency by updating
  * the inode (or indirect block) appropriately but delaying the actual block
  * de-allocation (i.e., freemap and free space count manipulation) until
  * after the updated versions reach stable storage.  After the disk is
  * updated, the blocks can be safely de-allocated whenever it is convenient.
  * This implementation handles only the common case of reducing a file's
  * length to zero. Other cases are handled by the conventional synchronous
  * write approach.
  *
  * The ffs implementation with which we worked double-checks
  * the state of the block pointers and file size as it reduces
  * a file's length.  Some of this code is replicated here in our
  * soft updates implementation.  The freeblks->fb_chkcnt field is
  * used to transfer a part of this information to the procedure
  * that eventually de-allocates the blocks.
  *
  * This routine should be called from the routine that shortens
  * a file's length, before the inode's size or block pointers
  * are modified. It will save the block pointer information for
  * later release and zero the inode so that the calling routine
  * can release it.
  */
 static long num_freeblks;	/* number of freeblks allocated */
 void
 softdep_setup_freeblocks(ip, length)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	off_t length;		/* The new length for the file */
 {
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	struct vnode *vp;
 	struct buf *bp;
 	struct fs *fs;
 	int i, error;
 
 	fs = ip->i_fs;
 	if (length != 0)
 		panic("softde_setup_freeblocks: non-zero length");
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_freeblks > max_softdeps / 2 && speedup_syncer() == 0)
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 	num_freeblks += 1;
 	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
 		M_FREEBLKS, M_WAITOK);
 	bzero(freeblks, sizeof(struct freeblks));
 	freeblks->fb_list.wk_type = D_FREEBLKS;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
 	freeblks->fb_fs = fs;
 	freeblks->fb_oldsize = ip->i_size;
 	freeblks->fb_newsize = length;
 	freeblks->fb_chkcnt = ip->i_blocks;
 	for (i = 0; i < NDADDR; i++) {
 		freeblks->fb_dblks[i] = ip->i_db[i];
 		ip->i_db[i] = 0;
 	}
 	for (i = 0; i < NIADDR; i++) {
 		freeblks->fb_iblks[i] = ip->i_ib[i];
 		ip->i_ib[i] = 0;
 	}
 	ip->i_blocks = 0;
 	ip->i_size = 0;
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
 	 * the buffer can be safely released.
 	 */
 	if ((error = bread(ip->i_devvp,
 	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, NOCRED, &bp)) != 0)
 		softdep_error("softdep_setup_freeblocks", error);
 	*((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) =
 	    ip->i_din;
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk.
 	 */
 	WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
 	 */
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, 1);
 	FREE_LOCK(&lk);
 	bdwrite(bp);
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
 	vp = ITOV(ip);
 	ACQUIRE_LOCK(&lk);
 	drain_output(vp, 1);
 	while (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
 		bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
 		deallocate_dependencies(bp, inodedep);
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		FREE_LOCK(&lk);
 		brelse(bp);
 		ACQUIRE_LOCK(&lk);
 	}
 	/*
 	 * Try freeing the inodedep in case that was the last dependency.
 	 */
 	if ((inodedep_lookup(fs, ip->i_number, 0, &inodedep)) != 0)
 		(void) free_inodedep(inodedep);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Reclaim any dependency structures from a buffer that is about to
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
  * associated with related dependencies do not occur.
  */
 static void
 deallocate_dependencies(bp, inodedep)
 	struct buf *bp;
 	struct inodedep *inodedep;
 {
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
 	struct diradd *dap;
 	int i;
 
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		switch (wk->wk_type) {
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			/*
 			 * None of the indirect pointers will ever be visible,
 			 * so they can simply be tossed. GOINGAWAY ensures
 			 * that allocated pointers will be saved in the buffer
 			 * cache until they are freed. Note that they will
 			 * only be able to be found by their physical address
 			 * since the inode mapping the logical address will
 			 * be gone. The save buffer used for the safe copy
 			 * was allocated in setup_allocindir_phase2 using
 			 * the physical address so it could be used for this
 			 * purpose. Hence we swap the safe copy with the real
 			 * copy, allowing the safe copy to be freed and holding
 			 * on to the real copy for later use in indir_trunc.
 			 */
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("deallocate_dependencies: already gone");
 			indirdep->ir_state |= GOINGAWAY;
 			while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
 				free_allocindir(aip, inodedep);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
 				panic("deallocate_dependencies: not indir");
 			bcopy(bp->b_data, indirdep->ir_savebp->b_data,
 			    bp->b_bcount);
 			WORKLIST_REMOVE(wk);
 			WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			/*
 			 * None of the directory additions will ever be
 			 * visible, so they can simply be tossed.
 			 */
 			for (i = 0; i < DAHASHSZ; i++)
 				while ((dap =
 				    LIST_FIRST(&pagedep->pd_diraddhd[i])))
 					free_diradd(dap);
 			while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
 				free_diradd(dap);
 			/*
 			 * Copy any directory remove dependencies to the list
 			 * to be processed after the zero'ed inode is written.
 			 * If the inode has already been written, then they 
 			 * can be dumped directly onto the work list.
 			 */
 			for (dirrem = LIST_FIRST(&pagedep->pd_dirremhd); dirrem;
 			     dirrem = LIST_NEXT(dirrem, dm_next)) {
 				LIST_REMOVE(dirrem, dm_next);
 				dirrem->dm_dirinum = pagedep->pd_ino;
 				if (inodedep == NULL)
 					add_to_worklist(&dirrem->dm_list);
 				else
 					WORKLIST_INSERT(&inodedep->id_bufwait,
 					    &dirrem->dm_list);
 			}
 			WORKLIST_REMOVE(&pagedep->pd_list);
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			continue;
 
 		case D_ALLOCINDIR:
 			free_allocindir(WK_ALLOCINDIR(wk), inodedep);
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_INODEDEP:
 			panic("deallocate_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 
 		default:
 			panic("deallocate_dependencies: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Free an allocdirect. Generate a new freefrag work request if appropriate.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocdirect(adphead, adp, delay)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
 	int delay;
 {
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocdirect: lock not held");
 #endif
 	if ((adp->ad_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(adp, ad_deps);
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	if ((adp->ad_state & COMPLETE) == 0)
 		WORKLIST_REMOVE(&adp->ad_list);
 	if (adp->ad_freefrag != NULL) {
 		if (delay)
 			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
 			    &adp->ad_freefrag->ff_list);
 		else
 			add_to_worklist(&adp->ad_freefrag->ff_list);
 	}
 	WORKITEM_FREE(adp, D_ALLOCDIRECT);
 }
 
 /*
  * Prepare an inode to be freed. The actual free operation is not
  * done until the zero'ed inode has been written to disk.
  */
 static long num_freefile;	/* number of freefile allocated */
 void
 softdep_freefile(pvp, ino, mode)
 		struct vnode *pvp;
 		ino_t ino;
 		int mode;
 {
 	struct inode *ip = VTOI(pvp);
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_freefile > max_softdeps / 2 && speedup_syncer() == 0)
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
 	num_freefile += 1;
 	MALLOC(freefile, struct freefile *, sizeof(struct freefile),
 		M_FREEFILE, M_WAITOK);
 	freefile->fx_list.wk_type = D_FREEFILE;
 	freefile->fx_list.wk_state = 0;
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
 	freefile->fx_fs = ip->i_fs;
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk and we can free the file immediately.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
 		add_to_worklist(&freefile->fx_list);
 		FREE_LOCK(&lk);
 		return;
 	}
 
 	/*
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk. Drop the dependency as it is no longer
 	 * necessary since the inode is being deallocated. We could process
 	 * the freefile immediately, but then we would have to clear the
 	 * id_inowait dependencies here and it is easier just to let the
 	 * zero'ed inode be written and let them be cleaned up in the
 	 * normal followup actions that follow the inode write.
 	 */
 	 if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		inodedep->id_state |= DEPCOMPLETE;
 		LIST_REMOVE(inodedep, id_deps);
 		inodedep->id_buf = NULL;
 	}
 	/*
 	 * If the inodedep has no dependencies associated with it,
 	 * then we must free it here and free the file immediately.
 	 * This case arises when an early allocation fails (for
 	 * example, the user is over their file quota).
 	 */
 	if (free_inodedep(inodedep) == 0)
 		WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
 	else
 		add_to_worklist(&freefile->fx_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Try to free an inodedep structure. Return 1 if it could be freed.
  */
 static int
 free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
 	if ((inodedep->id_state & ONWORKLIST) != 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 	    LIST_FIRST(&inodedep->id_inowait) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
 	    inodedep->id_nlinkdelta != 0 || inodedep->id_savedino != NULL)
 		return (0);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	num_inodedep -= 1;
 	return (1);
 }
 
 /*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
  * checks regarding the number of blocks de-allocated (compared
  * to the number of blocks allocated for the file) are also
  * performed in this function.
  */
 static void
 handle_workitem_freeblocks(freeblks)
 	struct freeblks *freeblks;
 {
 	struct inode tip;
 	ufs_daddr_t bn;
 	struct fs *fs;
 	int i, level, bsize;
 	long nblocks, blocksreleased = 0;
 	int error, allerror = 0;
 	ufs_lbn_t baselbns[NIADDR], tmpval;
 
 	tip.i_number = freeblks->fb_previousinum;
 	tip.i_devvp = freeblks->fb_devvp;
 	tip.i_dev = freeblks->fb_devvp->v_rdev;
 	tip.i_fs = freeblks->fb_fs;
 	tip.i_size = freeblks->fb_oldsize;
 	tip.i_uid = freeblks->fb_uid;
 	fs = freeblks->fb_fs;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
 		tmpval *= NINDIR(fs);
 		baselbns[i] = baselbns[i - 1] + tmpval;
 	}
 	nblocks = btodb(fs->fs_bsize);
 	blocksreleased = 0;
 	/*
 	 * Indirect blocks first.
 	 */
 	for (level = (NIADDR - 1); level >= 0; level--) {
 		if ((bn = freeblks->fb_iblks[level]) == 0)
 			continue;
 		if ((error = indir_trunc(&tip, fsbtodb(fs, bn), level,
 		    baselbns[level], &blocksreleased)) == 0)
 			allerror = error;
 		ffs_blkfree(&tip, bn, fs->fs_bsize);
 		blocksreleased += nblocks;
 	}
 	/*
 	 * All direct blocks or frags.
 	 */
 	for (i = (NDADDR - 1); i >= 0; i--) {
 		if ((bn = freeblks->fb_dblks[i]) == 0)
 			continue;
 		bsize = blksize(fs, &tip, i);
 		ffs_blkfree(&tip, bn, bsize);
 		blocksreleased += btodb(bsize);
 	}
 
 #ifdef DIAGNOSTIC
 	if (freeblks->fb_chkcnt != blocksreleased)
 		panic("handle_workitem_freeblocks: block count");
 	if (allerror)
 		softdep_error("handle_workitem_freeblks", allerror);
 #endif /* DIAGNOSTIC */
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 	num_freeblks -= 1;
 }
 
 /*
  * Release blocks associated with the inode ip and stored in the indirect
  * block dbn. If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  */
 static int
 indir_trunc(ip, dbn, level, lbn, countp)
 	struct inode *ip;
 	ufs_daddr_t dbn;
 	int level;
 	ufs_lbn_t lbn;
 	long *countp;
 {
 	struct buf *bp;
 	ufs_daddr_t *bap;
 	ufs_daddr_t nb;
 	struct fs *fs;
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	int i, lbnadd, nblocks;
 	int error, allerror = 0;
 
 	fs = ip->i_fs;
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	/*
 	 * Get buffer of block pointers to be freed. This routine is not
 	 * called until the zero'ed inode has been written, so it is safe
 	 * to free blocks as they are encountered. Because the inode has
 	 * been zero'ed, calls to bmap on these blocks will fail. So, we
 	 * have to use the on-disk address and the block device for the
 	 * filesystem to look them up. If the file was deleted before its
 	 * indirect blocks were all written to disk, the routine that set
 	 * us up (deallocate_dependencies) will have arranged to leave
 	 * a complete copy of the indirect block in memory for our use.
 	 * Otherwise we have to read the blocks in from the disk.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((bp = incore(ip->i_devvp, dbn)) != NULL &&
 	    (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		if (wk->wk_type != D_INDIRDEP ||
 		    (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
 		    (indirdep->ir_state & GOINGAWAY) == 0)
 			panic("indir_trunc: lost indirdep");
 		WORKLIST_REMOVE(wk);
 		WORKITEM_FREE(indirdep, D_INDIRDEP);
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			panic("indir_trunc: dangling dep");
 		FREE_LOCK(&lk);
 	} else {
 		FREE_LOCK(&lk);
 		error = bread(ip->i_devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error)
 			return (error);
 	}
 	/*
 	 * Recursively free indirect blocks.
 	 */
 	bap = (ufs_daddr_t *)bp->b_data;
 	nblocks = btodb(fs->fs_bsize);
 	for (i = NINDIR(fs) - 1; i >= 0; i--) {
 		if ((nb = bap[i]) == 0)
 			continue;
 		if (level != 0) {
 			if ((error = indir_trunc(ip, fsbtodb(fs, nb),
 			     level - 1, lbn + (i * lbnadd), countp)) != 0)
 				allerror = error;
 		}
 		ffs_blkfree(ip, nb, fs->fs_bsize);
 		*countp += nblocks;
 	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	return (allerror);
 }
 
 /*
  * Free an allocindir.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocindir(aip, inodedep)
 	struct allocindir *aip;
 	struct inodedep *inodedep;
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocindir: lock not held");
 #endif
 	if ((aip->ai_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(aip, ai_deps);
 	if (aip->ai_state & ONWORKLIST)
 		WORKLIST_REMOVE(&aip->ai_list);
 	LIST_REMOVE(aip, ai_next);
 	if ((freefrag = aip->ai_freefrag) != NULL) {
 		if (inodedep == NULL)
 			add_to_worklist(&freefrag->ff_list);
 		else
 			WORKLIST_INSERT(&inodedep->id_bufwait,
 			    &freefrag->ff_list);
 	}
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Directory entry addition dependencies.
  * 
  * When adding a new directory entry, the inode (with its incremented link
  * count) must be written to disk before the directory entry's pointer to it.
  * Also, if the inode is newly allocated, the corresponding freemap must be
  * updated (on disk) before the directory entry's pointer. These requirements
  * are met via undo/redo on the directory entry's pointer, which consists
  * simply of the inode number.
  * 
  * As directory entries are added and deleted, the free space within a
  * directory block can become fragmented.  The ufs file system will compact
  * a fragmented directory block to make space for a new entry. When this
  * occurs, the offsets of previously added entries change. Any "diradd"
  * dependency structures corresponding to these entries must be updated with
  * the new offsets.
  */
 
 /*
  * This routine is called after the in-memory inode's link
  * count has been incremented, but before the directory entry's
  * pointer to the inode has been set.
  */
 void 
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for directory */
 	off_t diroffset;	/* offset of new entry in directory */
 	long newinum;		/* inode referenced by new directory entry */
 	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
 {
 	int offset;		/* offset of new entry within directory block */
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir1, *mkdir2;
 
 	/*
 	 * Whiteouts have no dependencies.
 	 */
 	if (newinum == WINO) {
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		return;
 	}
 
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
 	MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_WAITOK);
 	bzero(dap, sizeof(struct diradd));
 	dap->da_list.wk_type = D_DIRADD;
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(&lk);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_WAITOK);
 		mkdir1->md_list.wk_type = D_MKDIR;
 		mkdir1->md_state = MKDIR_BODY;
 		mkdir1->md_diradd = dap;
 		MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_WAITOK);
 		mkdir2->md_list.wk_type = D_MKDIR;
 		mkdir2->md_state = MKDIR_PARENT;
 		mkdir2->md_diradd = dap;
 		/*
 		 * Dependency on "." and ".." being written to disk.
 		 */
 		mkdir1->md_buf = newdirbp;
 		ACQUIRE_LOCK(&lk);
 		LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
 		WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
 		FREE_LOCK(&lk);
 		bdwrite(newdirbp);
 		/*
 		 * Dependency on link count increase for parent directory
 		 */
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
 		    || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 			dap->da_state &= ~MKDIR_PARENT;
 			WORKITEM_FREE(mkdir2, D_MKDIR);
 		} else {
 			LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
 			WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
 		}
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	(void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This procedure is called to change the offset of a directory
  * entry when compacting a directory block which must be owned
  * exclusively by the caller. Note that the actual entry movement
  * must be done in this procedure to ensure that no I/O completions
  * occur while the move is in progress.
  */
 void 
 softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
 	caddr_t newloc;		/* address of new directory location */
 	int entrysize;		/* size of directory entry */
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	ufs_lbn_t lbn;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
 		goto done;
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
 	for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(oldoffset)]);
 	     dap; dap = LIST_NEXT(dap, da_pdlist)) {
 		if (dap->da_offset != oldoffset)
 			continue;
 		dap->da_offset = newoffset;
 		if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
 			break;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
 		    dap, da_pdlist);
 		break;
 	}
 	if (dap == NULL) {
 		for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
 		     dap; dap = LIST_NEXT(dap, da_pdlist)) {
 			if (dap->da_offset == oldoffset) {
 				dap->da_offset = newoffset;
 				break;
 			}
 		}
 	}
 done:
 	bcopy(oldloc, newloc, entrysize);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Free a diradd dependency structure. This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 free_diradd(dap)
 	struct diradd *dap;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_diradd: lock not held");
 #endif
 	WORKLIST_REMOVE(&dap->da_list);
 	LIST_REMOVE(dap, da_pdlist);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum,
 	    0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &= ~mkdir->md_state;
 			WORKLIST_REMOVE(&mkdir->md_list);
 			LIST_REMOVE(mkdir, md_mkdirs);
 			WORKITEM_FREE(mkdir, D_MKDIR);
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
 /*
  * Directory entry removal dependencies.
  * 
  * When removing a directory entry, the entry's inode pointer must be
  * zero'ed on disk before the corresponding inode's link count is decremented
  * (possibly freeing the inode for re-use). This dependency is handled by
  * updating the directory entry but delaying the inode count reduction until
  * after the directory block has been written to disk. After this point, the
  * inode count can be decremented whenever it is convenient.
  */
 
 /*
  * This routine should be called immediately after removing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will do this task when it is safe.
  */
 void 
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem;
 
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir);
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
 	} else {
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
 static struct dirrem *
 newdirrem(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	ufs_lbn_t lbn;
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
 	MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
 		M_DIRREM, M_WAITOK);
 	bzero(dirrem, sizeof(struct dirrem));
 	dirrem->dm_list.wk_type = D_DIRREM;
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_mnt = ITOV(ip)->v_mount;
 	dirrem->dm_oldinum = ip->i_number;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dirrem->dm_pagedep = pagedep;
 	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
 	 * be de-allocated. Check for an entry on both the pd_dirraddhd
 	 * list and the pd_pendinghd list.
 	 */
 	for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(offset)]);
 	     dap; dap = LIST_NEXT(dap, da_pdlist))
 		if (dap->da_offset == offset)
 			break;
 	if (dap == NULL) {
 		for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
 		     dap; dap = LIST_NEXT(dap, da_pdlist))
 			if (dap->da_offset == offset)
 				break;
 		if (dap == NULL)
 			return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point, so just delete it.
 	 */
 	if ((dap->da_state & ATTACHED) == 0)
 		panic("newdirrem: not ATTACHED");
 	if (dap->da_newinum != ip->i_number)
 		panic("newdirrem: inum %d should be %d",
 		    ip->i_number, dap->da_newinum);
 	free_diradd(dap);
 	dirrem->dm_state |= COMPLETE;
 	return (dirrem);
 }
 
 /*
  * Directory entry change dependencies.
  * 
  * Changing an existing directory entry requires that an add operation
  * be completed first followed by a deletion. The semantics for the addition
  * are identical to the description of adding a new entry above except
  * that the rollback is to the old inode number rather than zero. Once
  * the addition dependency is completed, the removal is done as described
  * in the removal routine above.
  */
 
 /*
  * This routine should be called immediately after changing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will perform this task when it is safe.
  */
 void 
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	long newinum;		/* new inode number for changed entry */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	struct diradd *dap = NULL;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 
 	offset = blkoff(dp->i_fs, dp->i_offset);
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
 		MALLOC(dap, struct diradd *, sizeof(struct diradd),
 		    M_DIRADD, M_WAITOK);
 		bzero(dap, sizeof(struct diradd));
 		dap->da_list.wk_type = D_DIRADD;
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
 	}
 
 	/*
 	 * Allocate a new dirrem and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir);
 	pagedep = dirrem->dm_pagedep;
 	/*
 	 * The possible values for isrmdir:
 	 *	0 - non-directory file rename
 	 *	1 - directory rename within same directory
 	 *   inum - directory rename to new directory of given inode number
 	 * When renaming to a new directory, we are both deleting and
 	 * creating a new directory entry, so the link count on the new
 	 * directory should not change. Thus we do not need the followup
 	 * dirrem which is usually done in handle_workitem_remove. We set
 	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
 	 * followup dirrem.
 	 */
 	if (isrmdir > 1)
 		dirrem->dm_state |= DIRCHG;
 
 	/*
 	 * Whiteouts have no additional dependencies,
 	 * so just put the dirrem on the correct list.
 	 */
 	if (newinum == WINO) {
 		if ((dirrem->dm_state & COMPLETE) == 0) {
 			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			add_to_worklist(&dirrem->dm_list);
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	dap->da_previous = dirrem;
 	if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
 	/*
 	 * If the previous inode was never written or its previous directory
 	 * entry was never written, then we do not want to roll back to this
 	 * previous value. Instead we want to roll back to zero and immediately
 	 * free the unwritten or unreferenced inode.
 	 */
 	if (dirrem->dm_state & COMPLETE) {
 		dap->da_state &= ~DIRCHG;
 		dap->da_pagedep = pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever the link count on an inode is increased.
  * It creates an inode dependency so that the new reference(s)
  * to the inode cannot be committed to disk until the updated
  * inode has been written.
  */
 void
 softdep_increase_linkcnt(ip)
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
 
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
 static void 
 handle_workitem_remove(dirrem)
 	struct dirrem *dirrem;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct vnode *vp;
 	struct inode *ip;
 	int error;
 
 	if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
 		softdep_error("handle_workitem_remove: vget", error);
 		return;
 	}
 	ip = VTOI(vp);
 	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
 		ip->i_nlink--;
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
 		ip->i_flag |= IN_CHANGE;
 		vput(vp);
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
 	 * just deleted parent directory entry and the reference for ".".
 	 * Next truncate the directory to length zero. When the
 	 * truncation completes, arrange to have the reference count on
 	 * the parent decremented to account for the loss of "..".
 	 */
 	ip->i_nlink -= 2;
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
 	ip->i_flag |= IN_CHANGE;
 	if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
 		softdep_error("handle_workitem_remove: truncate", error);
 	/*
 	 * Rename a directory to a new parent. Since, we are both deleting
 	 * and creating a new directory entry, the link count on the new
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		vput(vp);
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
 	    &inodedep);
 	dirrem->dm_state = 0;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(&lk);
 	vput(vp);
 }
 
 /*
  * Inode de-allocation dependencies.
  * 
  * When an inode's link count is reduced to zero, it can be de-allocated. We
  * found it convenient to postpone de-allocation until after the inode is
  * written to disk with its new link count (zero).  At this point, all of the
  * on-disk inode's block pointers are nullified and, with careful dependency
  * list ordering, all dependencies related to the inode will be satisfied and
  * the corresponding dependency structures de-allocated.  So, if/when the
  * inode is reused, there will be no mixing of old dependencies with new
  * ones.  This artificial dependency is set up by the block de-allocation
  * procedure above (softdep_setup_freeblocks) and completed by the
  * following procedure.
  */
 static void 
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
 
 #ifdef DEBUG
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
 		panic("handle_workitem_freefile: inodedep survived");
 	FREE_LOCK(&lk);
 #endif
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
 	tip.i_fs = freefile->fx_fs;
 	vp.v_data = &tip;
 	if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	WORKITEM_FREE(freefile, D_FREEFILE);
 	num_freefile -= 1;
 }
 
 /*
  * Disk writes.
  * 
  * The dependency structures constructed above are most actively used when file
  * system blocks are written to disk.  No constraints are placed on when a
  * block can be written, but unsatisfied update dependencies are made safe by
  * modifying (or replacing) the source memory for the duration of the disk
  * write.  When the disk write completes, the memory block is again brought
  * up-to-date.
  *
  * In-core inode structure reclamation.
  * 
  * Because there are a finite number of "in-core" inode structures, they are
  * reused regularly.  By transferring all inode-related dependencies to the
  * in-memory inode block and indexing them separately (via "inodedep"s), we
  * can allow "in-core" inode structures to be reused at any time and avoid
  * any increase in contention.
  *
  * Called just before entering the device driver to initiate a new disk I/O.
  * The buffer must be locked, thus, no I/O completion operations can occur
  * while we are manipulating its associated dependencies.
  */
 void 
 softdep_disk_io_initiation(bp)
 	struct buf *bp;		/* structure describing disk write to occur */
 {
 	struct worklist *wk, *nextwk;
 	struct indirdep *indirdep;
 
 	/*
 	 * We only care about write operations. There should never
 	 * be dependencies for reads.
 	 */
 	if (bp->b_flags & B_READ)
 		panic("softdep_disk_io_initiation: read");
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
 		nextwk = LIST_NEXT(wk, wk_list);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			initiate_write_filepage(WK_PAGEDEP(wk), bp);
 			continue;
 
 		case D_INODEDEP:
 			initiate_write_inodeblock(WK_INODEDEP(wk), bp);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_io_initiation: indirdep gone");
 			/*
 			 * If there are no remaining dependencies, this
 			 * will be writing the real pointers, so the
 			 * dependency can be freed.
 			 */
 			if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
 				indirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;
 				brelse(indirdep->ir_savebp);
 				/* inline expand WORKLIST_REMOVE(wk); */
 				wk->wk_state &= ~ONWORKLIST;
 				LIST_REMOVE(wk, wk_list);
 				WORKITEM_FREE(indirdep, D_INDIRDEP);
 				continue;
 			}
 			/*
 			 * Replace up-to-date version with safe version.
 			 */
 			ACQUIRE_LOCK(&lk);
 			indirdep->ir_state &= ~ATTACHED;
 			indirdep->ir_state |= UNDONE;
 			MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
 			    M_INDIRDEP, M_WAITOK);
 			bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 			bcopy(indirdep->ir_savebp->b_data, bp->b_data,
 			    bp->b_bcount);
 			FREE_LOCK(&lk);
 			continue;
 
 		case D_MKDIR:
 		case D_BMSAFEMAP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			continue;
 
 		default:
 			panic("handle_disk_io_initiation: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in a directory. The buffer must be locked,
  * thus, no I/O completion operations can occur while we are
  * manipulating its associated dependencies.
  */
 static void
 initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
 
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
 		 * understand chaining. Here biodone will reissue the call
 		 * to strategy for the incomplete buffers.
 		 */
 		printf("initiate_write_filepage: already started\n");
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
 	ACQUIRE_LOCK(&lk);
 	for (i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = LIST_NEXT(dap, da_pdlist)) {
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum)
 				panic("%s: dir inum %d != new %d",
 				    "initiate_write_filepage",
 				    ep->d_ino, dap->da_newinum);
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
 				ep->d_ino = 0;
 			dap->da_state &= ~ATTACHED;
 			dap->da_state |= UNDONE;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct dinode *dp;
 	struct fs *fs;
 	ufs_lbn_t prevlbn = 0;
 	int i, deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino != NULL)
 			panic("initiate_write_inodeblock: already doing I/O");
 		MALLOC(inodedep->id_savedino, struct dinode *,
 		    sizeof(struct dinode), M_INODEDEP, M_WAITOK);
 		*inodedep->id_savedino = *dp;
 		bzero((caddr_t)dp, sizeof(struct dinode));
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
 		return;
 	/*
 	 * Set the dependencies to busy.
 	 */
 	ACQUIRE_LOCK(&lk);
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
 		if (deplist != 0 && prevlbn >= adp->ad_lbn)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_lbn;
 		if (adp->ad_lbn < NDADDR &&
 		    dp->di_db[adp->ad_lbn] != adp->ad_newblkno)
 			panic("%s: direct pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn,
 			    dp->di_db[adp->ad_lbn], adp->ad_newblkno);
 		if (adp->ad_lbn >= NDADDR &&
 		    dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)
 			panic("%s: indirect pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn - NDADDR,
 			    dp->di_ib[adp->ad_lbn - NDADDR], adp->ad_newblkno);
 		deplist |= 1 << adp->ad_lbn;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* DIAGNOSTIC */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_lbn >= NDADDR)
 			break;
 		dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
 		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* DIAGNOSTIC */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* DIAGNOSTIC */
 			dp->di_ib[i] = 0;
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_lbn; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the file system caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
  */
 void 
 softdep_disk_write_complete(bp)
 	struct buf *bp;		/* describes the completed disk write */
 {
 	struct worklist *wk;
 	struct workhead reattach;
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct allocdirect *adp;
 	struct indirdep *indirdep;
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 #ifdef DEBUG
 	if (lk.lkt_held != -1)
 		panic("softdep_disk_write_complete: lock is held");
 	lk.lkt_held = -2;
 #endif
 	LIST_INIT(&reattach);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
 			bmsafemap = WK_BMSAFEMAP(wk);
 			while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
 				newblk->nb_state |= DEPCOMPLETE;
 				newblk->nb_bmsafemap = NULL;
 				LIST_REMOVE(newblk, nb_deps);
 			}
 			while ((adp =
 			   LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
 				adp->ad_state |= DEPCOMPLETE;
 				adp->ad_buf = NULL;
 				LIST_REMOVE(adp, ad_deps);
 				handle_allocdirect_partdone(adp);
 			}
 			while ((aip =
 			    LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 				LIST_REMOVE(aip, ai_deps);
 				handle_allocindir_partdone(aip);
 			}
 			while ((inodedep =
 			     LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
 				inodedep->id_state |= DEPCOMPLETE;
 				LIST_REMOVE(inodedep, id_deps);
 				inodedep->id_buf = NULL;
 			}
 			WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 			continue;
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			adp->ad_state |= COMPLETE;
 			handle_allocdirect_partdone(adp);
 			continue;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			aip->ai_state |= COMPLETE;
 			handle_allocindir_partdone(aip);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_write_complete: indirdep gone");
 			bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
 			FREE(indirdep->ir_saveddata, M_INDIRDEP);
 			indirdep->ir_saveddata = 0;
 			indirdep->ir_state &= ~UNDONE;
 			indirdep->ir_state |= ATTACHED;
 			while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
 				handle_allocindir_partdone(aip);
 				if (aip == LIST_FIRST(&indirdep->ir_donehd))
 					panic("disk_write_complete: not gone");
 			}
 			WORKLIST_INSERT(&reattach, wk);
 			if ((bp->b_flags & B_DELWRI) == 0)
 				stat_indir_blk_ptrs++;
 			bdirty(bp);
 			continue;
 
 		default:
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/*
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 #ifdef DEBUG
 	if (lk.lkt_held != -2)
 		panic("softdep_disk_write_complete: lock lost");
 	lk.lkt_held = -1;
 #endif
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void 
 handle_allocdirect_partdone(adp)
 	struct allocdirect *adp;	/* the completed allocdirect */
 {
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
 	long bsize;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (adp->ad_buf != NULL)
 		panic("handle_allocdirect_partdone: dangling dep");
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem. Thus, we cannot free any
 	 * allocdirects after one whose ad_oldblkno claims a fragment as
 	 * these blocks must be rolled back to zero before writing the inode.
 	 * We check the currently active set of allocdirects in id_inoupdt.
 	 */
 	inodedep = adp->ad_inodedep;
 	bsize = inodedep->id_fs->fs_bsize;
 	for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp;
 	     listadp = TAILQ_NEXT(listadp, ad_next)) {
 		/* found our block */
 		if (listadp == adp)
 			break;
 		/* continue if ad_oldlbn is not a fragment */
 		if (listadp->ad_oldsize == 0 ||
 		    listadp->ad_oldsize == bsize)
 			continue;
 		/* hit a fragment */
 		return;
 	}
 	/*
 	 * If we have reached the end of the current list without
 	 * finding the just finished dependency, then it must be
 	 * on the future dependency list. Future dependencies cannot
 	 * be freed until they are moved to the current list.
 	 */
 	if (listadp == NULL) {
 #ifdef DEBUG
 		for (listadp = TAILQ_FIRST(&inodedep->id_newinoupdt); listadp;
 		     listadp = TAILQ_NEXT(listadp, ad_next))
 			/* found our block */
 			if (listadp == adp)
 				break;
 		if (listadp == NULL)
 			panic("handle_allocdirect_partdone: lost dep");
 #endif /* DEBUG */
 		return;
 	}
 	/*
 	 * If we have found the just finished dependency, then free
 	 * it along with anything that follows it that is complete.
 	 */
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
 		free_allocdirect(&inodedep->id_inoupdt, adp, 1);
 	}
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void
 handle_allocindir_partdone(aip)
 	struct allocindir *aip;		/* the completed allocindir */
 {
 	struct indirdep *indirdep;
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (aip->ai_buf != NULL)
 		panic("handle_allocindir_partdone: dangling dependency");
 	indirdep = aip->ai_indirdep;
 	if (indirdep->ir_state & UNDONE) {
 		LIST_REMOVE(aip, ai_next);
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
 	((ufs_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 	    aip->ai_newblkno;
 	LIST_REMOVE(aip, ai_next);
 	if (aip->ai_freefrag != NULL)
 		add_to_worklist(&aip->ai_freefrag->ff_list);
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static int 
 handle_written_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 {
 	struct worklist *wk, *filefree;
 	struct allocdirect *adp, *nextadp;
 	struct dinode *dp;
 	int hadchanges;
 
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
 	inodedep->id_state |= COMPLETE;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
 	 * all associated dependencies have been cleared and the
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino != NULL) {
 		*dp = *inodedep->id_savedino;
 		FREE(inodedep->id_savedino, M_INODEDEP);
 		inodedep->id_savedino = NULL;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
 	hadchanges = 0;
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (adp->ad_lbn < NDADDR) {
 			if (dp->di_db[adp->ad_lbn] != adp->ad_oldblkno)
 				panic("%s: %s #%ld mismatch %d != %d",
 				    "handle_written_inodeblock",
 				    "direct pointer", adp->ad_lbn,
 				    dp->di_db[adp->ad_lbn], adp->ad_oldblkno);
 			dp->di_db[adp->ad_lbn] = adp->ad_newblkno;
 		} else {
 			if (dp->di_ib[adp->ad_lbn - NDADDR] != 0)
 				panic("%s: %s #%ld allocated as %d",
 				    "handle_written_inodeblock",
 				    "indirect pointer", adp->ad_lbn - NDADDR,
 				    dp->di_ib[adp->ad_lbn - NDADDR]);
 			dp->di_ib[adp->ad_lbn - NDADDR] = adp->ad_newblkno;
 		}
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
 	 */
 	if (inodedep->id_savedsize == -1)
 		panic("handle_written_inodeblock: bad size");
 	if (dp->di_size != inodedep->id_savedsize) {
 		dp->di_size = inodedep->id_savedsize;
 		hadchanges = 1;
 	}
 	inodedep->id_savedsize = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges)
 		bdirty(bp);
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 		handle_allocdirect_partdone(adp);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
 	 * before the old ones have been deleted.
 	 */
 	filefree = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_FREEFILE:
 			/*
 			 * We defer adding filefree to the worklist until
 			 * all other additions have been made to ensure
 			 * that it will be done after all the old blocks
 			 * have been freed.
 			 */
 			if (filefree != NULL)
 				panic("handle_written_inodeblock: filefree");
 			filefree = wk;
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
 			continue;
 
 		case D_DIRADD:
 			diradd_inode_written(WK_DIRADD(wk), inodedep);
 			continue;
 
 		case D_FREEBLKS:
 		case D_FREEFRAG:
 		case D_DIRREM:
 			add_to_worklist(wk);
 			continue;
 
 		default:
 			panic("handle_written_inodeblock: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	if (filefree != NULL) {
 		if (free_inodedep(inodedep) == 0)
 			panic("handle_written_inodeblock: live inodedep");
 		add_to_worklist(filefree);
 		return (0);
 	}
 
 	/*
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
 		return (0);
 	return (hadchanges);
 }
 
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
 	struct pagedep *pagedep;
 
 	dap->da_state |= COMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
  * Handle the completion of a mkdir dependency.
  */
 static void
 handle_written_mkdir(mkdir, type)
 	struct mkdir *mkdir;
 	int type;
 {
 	struct diradd *dap;
 	struct pagedep *pagedep;
 
 	if (mkdir->md_state != type)
 		panic("handle_written_mkdir: bad type");
 	dap = mkdir->md_diradd;
 	dap->da_state &= ~type;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
 		dap->da_state |= DEPCOMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	LIST_REMOVE(mkdir, md_mkdirs);
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
  * with further splbio interrupts blocked.
  */
 static int 
 handle_written_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
 	struct direct *ep;
 	int i, chgs;
 
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	/*
 	 * Free any directory additions that have been committed.
 	 */
 	while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 		free_diradd(dap);
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
 	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = nextdap) {
 			nextdap = LIST_NEXT(dap, da_pdlist);
 			if (dap->da_state & ATTACHED)
 				panic("handle_written_filepage: attached");
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			ep->d_ino = dap->da_newinum;
 			dap->da_state &= ~UNDONE;
 			dap->da_state |= ATTACHED;
 			chgs = 1;
 			/*
 			 * If the inode referenced by the directory has
 			 * been written out, then the dependency can be
 			 * moved to the pending list.
 			 */
 			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 				LIST_REMOVE(dap, da_pdlist);
 				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
 				    da_pdlist);
 			}
 		}
 	}
 	/*
 	 * If there were any rollbacks in the directory, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (chgs) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
 	}
 	/*
 	 * If no dependencies remain, the pagedep will be freed.
 	 * Otherwise it will remain to update the page before it
 	 * is written back to disk.
 	 */
 	if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
 		for (i = 0; i < DAHASHSZ; i++)
 			if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
 				break;
 		if (i == DAHASHSZ) {
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			return (0);
 		}
 	}
 	return (1);
 }
 
 /*
  * Writing back in-core inode structures.
  * 
  * The file system only accesses an inode's contents when it occupies an
  * "in-core" inode structure.  These "in-core" structures are separate from
  * the page frames used to cache inode blocks.  Only the latter are
  * transferred to/from the disk.  So, when the updated contents of the
  * "in-core" inode structure are copied to the corresponding in-memory inode
  * block, the dependencies are also transferred.  The following procedure is
  * called when copying a dirty "in-core" inode to a cached inode block.
  */
 
 /*
  * Called when an inode is loaded from disk. If the effective link count
  * differed from the actual link count when it was last flushed, then we
  * need to ensure that the correct effective link count is put back.
  */
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	if (inodedep->id_nlinkdelta != 0) {
 		ip->i_effnlink -= inodedep->id_nlinkdelta;
 		ip->i_flag |= IN_MODIFIED;
 		inodedep->id_nlinkdelta = 0;
 		(void) free_inodedep(inodedep);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called just before the "in-core" inode
  * information is to be copied to the in-memory inode block.
  * Recall that an inode block contains several inodes. If
  * the force flag is set, then the dependencies will be
  * cleared so that the update can always be made. Note that
  * the buffer is locked when this routine is called, so we
  * will never be in the middle of writing the inode block 
  * to disk.
  */
 void 
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 	struct buf *bp;		/* the buffer containing the inode block */
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
 	struct worklist *wk;
 	int error, gotit;
 
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
 	 * the inode is (potentially) tossed out of the cache. Otherwise,
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (ip->i_effnlink != ip->i_nlink) {
 		(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC,
 		    &inodedep);
 	} else if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_update_inodeblock: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
 	/*
 	 * Any new dependencies associated with the incore inode must 
 	 * now be moved to the list associated with the buffer holding
 	 * the in-memory copy of the inode. Once merged process any
 	 * allocdirects that are completed by the merger.
 	 */
 	merge_inode_lists(inodedep);
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
 	 * can be moved to the id_bufwait so that they will be
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
 	 * that allocates them have been written (indicated by
 	 * DEPCOMPLETE being set in id_state). If we are doing a
 	 * forced sync (e.g., an fsync on a file), we force the bitmap
 	 * to be written so that the update can be done.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
 	FREE_LOCK(&lk);
 	if (gotit &&
 	    (error = VOP_BWRITE(inodedep->id_buf->b_vp, inodedep->id_buf)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		panic("softdep_update_inodeblock: update failed");
 }
 
 /*
  * Merge the new inode dependency list (id_newinoupdt) into the old
  * inode dependency list (id_inoupdt). This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 merge_inode_lists(inodedep)
 	struct inodedep *inodedep;
 {
 	struct allocdirect *listadp, *newadp;
 
 	newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
 		if (listadp->ad_lbn < newadp->ad_lbn) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
 		if (listadp->ad_lbn == newadp->ad_lbn) {
 			allocdirect_merge(&inodedep->id_inoupdt, newadp,
 			    listadp);
 			listadp = newadp;
 		}
 		newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	}
 	while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
 	}
 }
 
 /*
  * If we are doing an fsync, then we must ensure that any directory
  * entries for the inode have been written after the inode gets to disk.
  */
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 	struct diradd *dap, *olddap;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct worklist *wk;
 	struct mount *mnt;
 	struct vnode *pvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct proc *p = CURPROC;		/* XXX */
 	int error, ret, flushparent;
 	ino_t parentino;
 	ufs_lbn_t lbn;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	for (error = 0, flushparent = 0, olddap = NULL; ; ) {
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
 			break;
 		if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
 		    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 		    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 		    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL)
 			panic("softdep_fsync: pending ops");
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
 		if (wk->wk_type != D_DIRADD)
 			panic("softdep_fsync: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 		dap = WK_DIRADD(wk);
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == olddap)
 			panic("softdep_fsync: flush failed");
 		olddap = dap;
 		/*
 		 * Flush our parent if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		mnt = pagedep->pd_mnt;
 		parentino = pagedep->pd_ino;
 		lbn = pagedep->pd_lbn;
 		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)
 			panic("softdep_fsync: dirty");
 		flushparent = dap->da_state & MKDIR_PARENT;
 		/*
 		 * If we are being fsync'ed as part of vgone'ing this vnode,
 		 * then we will not be able to release and recover the
 		 * vnode below, so we just have to give up on writing its
 		 * directory entry out. It will eventually be written, just
 		 * not now, but then the user was not asking to have it
 		 * written, so we are not breaking any promises.
 		 */
 		if (vp->v_flag & VXLOCK)
 			break;
 		/*
 		 * We prevent deadlock by always fetching inodes from the
 		 * root, moving down the directory tree. Thus, when fetching
 		 * our parent directory, we must unlock ourselves before
 		 * requesting the lock on our parent. See the comment in
 		 * ufs_lookup for details on possible races.
 		 */
 		FREE_LOCK(&lk);
 		VOP_UNLOCK(vp, 0, p);
 		if ((error = VFS_VGET(mnt, parentino, &pvp)) != 0) {
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		if (flushparent) {
 			if ((error = UFS_UPDATE(pvp, 1)) != 0) {
 				vput(pvp);
 				return (error);
 			}
 		}
 		/*
 		 * Flush directory page containing the inode's name.
 		 */
 		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
 		    &bp);
 		ret = VOP_BWRITE(bp->b_vp, bp);
 		vput(pvp);
 		if (error != 0)
 			return (error);
 		if (ret != 0)
 			return (ret);
 	}
 	FREE_LOCK(&lk);
 	return (0);
 }
 
 /*
  * Flush all the dirty bitmaps associated with the block device
  * before flushing the rest of the dirty blocks so as to reduce
  * the number of dependencies that will have to be rolled back.
  */
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 
 	if (vp->v_type != VBLK)
 		panic("softdep_fsync_mountdev: vnode not VBLK");
 	ACQUIRE_LOCK(&lk);
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
 		 */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("softdep_fsync_mountdev: not dirty");
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
 		    wk->wk_type != D_BMSAFEMAP) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		FREE_LOCK(&lk);
 		(void) bawrite(bp);
 		ACQUIRE_LOCK(&lk);
 		/*
 		 * Since we may have slept during the I/O, we need 
 		 * to start from a known point.
 		 */
 		nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 	}
 	drain_output(vp, 1);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called when we are trying to synchronously flush a
  * file. This routine must eliminate any filesystem metadata dependencies
  * so that the syncing routine can succeed by pushing the dirty blocks
  * associated with the file. If any I/O errors occur, they are returned.
  */
 int
 softdep_sync_metadata(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct pagedep *pagedep;
 	struct allocdirect *adp;
 	struct allocindir *aip;
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	int i, error, waitfor;
 
 	/*
 	 * Check whether this vnode is involved in a filesystem
 	 * that is doing soft dependency processing.
 	 */
 	if (vp->v_type != VBLK) {
 		if (!DOINGSOFTDEP(vp))
 			return (0);
 	} else
 		if (vp->v_specmountpoint == NULL ||
 		    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP) == 0)
 			return (0);
 	/*
 	 * Ensure that any direct block dependencies have been cleared.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) {
 		FREE_LOCK(&lk);
 		return (error);
 	}
 	/*
 	 * For most files, the only metadata dependencies are the
 	 * cylinder group maps that allocate their inode or blocks.
 	 * The block allocation dependencies can be found by traversing
 	 * the dependency lists for any buffers that remain on their
 	 * dirty buffer list. The inode allocation dependency will
 	 * be resolved when the inode is updated with MNT_WAIT.
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 */
 	waitfor = MNT_NOWAIT;
 top:
 	if (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 	bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 loop:
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk;
 	     wk = LIST_NEXT(wk, wk_list)) {
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			if (adp->ad_state & DEPCOMPLETE)
 				break;
 			nbp = adp->ad_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			if (aip->ai_state & DEPCOMPLETE)
 				break;
 			nbp = aip->ai_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_INDIRDEP:
 		restart:
 			for (aip = LIST_FIRST(&WK_INDIRDEP(wk)->ir_deplisthd);
 			     aip; aip = LIST_NEXT(aip, ai_next)) {
 				if (aip->ai_state & DEPCOMPLETE)
 					continue;
 				nbp = aip->ai_buf;
 				if (getdirtybuf(&nbp, MNT_WAIT) == 0)
 					goto restart;
 				FREE_LOCK(&lk);
 				if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 					bawrite(bp);
 					return (error);
 				}
 				ACQUIRE_LOCK(&lk);
 				goto restart;
 			}
 			break;
 
 		case D_INODEDEP:
 			if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
 			    WK_INODEDEP(wk)->id_ino)) != 0) {
 				FREE_LOCK(&lk);
 				bawrite(bp);
 				return (error);
 			}
 			break;
 
 		case D_PAGEDEP:
 			/*
 			 * We are trying to sync a directory that may
 			 * have dependencies on both its own metadata
 			 * and/or dependencies on the inodes of any
 			 * recently allocated files. We walk its diradd
 			 * lists pushing out the associated inode.
 			 */
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
 					continue;
 				if ((error =
 				    flush_pagedep_deps(vp, pagedep->pd_mnt,
 						&pagedep->pd_diraddhd[i]))) {
 					FREE_LOCK(&lk);
 					bawrite(bp);
 					return (error);
 				}
 			}
 			break;
 
 		case D_MKDIR:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_MKDIR(wk)->md_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_BMSAFEMAP:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_BMSAFEMAP(wk)->sm_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		default:
 			panic("softdep_sync_metadata: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	(void) getdirtybuf(&TAILQ_NEXT(bp, b_vnbufs), MNT_WAIT);
 	nbp = TAILQ_NEXT(bp, b_vnbufs);
 	FREE_LOCK(&lk);
 	bawrite(bp);
 	ACQUIRE_LOCK(&lk);
 	if (nbp != NULL) {
 		bp = nbp;
 		goto loop;
 	}
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, proceed with the second pass
 	 * which will wait for the I/O as per above.
 	 */
 	drain_output(vp, 1);
 	/*
 	 * The brief unlock is to allow any pent up dependency
 	 * processing to be done.
 	 */
 	if (waitfor == MNT_NOWAIT) {
 		waitfor = MNT_WAIT;
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 
 	/*
 	 * If we have managed to get rid of all the dirty buffers,
 	 * then we are done. For certain directories and block
 	 * devices, we may need to do further work.
 	 */
 	if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 
 	FREE_LOCK(&lk);
 	/*
 	 * If we are trying to sync a block device, some of its buffers may
 	 * contain metadata that cannot be written until the contents of some
 	 * partially written files have been written to disk. The only easy
 	 * way to accomplish this is to sync the entire filesystem (luckily
 	 * this happens rarely).
 	 */
 	if (vp->v_type == VBLK && vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
 	    (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, ap->a_cred,
 	     ap->a_p)) != 0)
 		return (error);
 	return (0);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
  * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(fs, ino)
 	struct fs *fs;
 	ino_t ino;
 {
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	int error, waitfor;
 	struct buf *bp;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
 	for (waitfor = MNT_NOWAIT; ; ) {
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
 		for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 		     adp = TAILQ_NEXT(adp, ad_next)) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		for (adp = TAILQ_FIRST(&inodedep->id_newinoupdt); adp;
 		     adp = TAILQ_NEXT(adp, ad_next)) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
 		if (waitfor == MNT_WAIT)
 			break;
 		waitfor = MNT_WAIT;
 	}
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
 	if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	return (0);
 }
 
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct vnode *pvp;
 	struct mount *mp;
 	struct diraddhd *diraddhdp;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
 	int gotit, error = 0;
 	struct buf *bp;
 	ino_t inum;
 
 	ump = VFSTOUFS(mp);
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(&lk);
 			if ((error = UFS_UPDATE(pvp, 1)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_PARENT)
 				panic("flush_pagedep_deps: MKDIR");
 		}
 		/*
 		 * Flush the file on which the directory entry depends.
 		 * If the inode has already been pushed out of the cache,
 		 * then all the block dependencies will have been flushed
 		 * leaving only inode dependencies (e.g., bitmaps). Thus,
 		 * we do a ufs_ihashget to check for the vnode in the cache.
 		 * If it is there, we do a full flush. If it is no longer
 		 * there we need only dispose of any remaining bitmap
 		 * dependencies and write the inode to disk.
 		 */
 		inum = dap->da_newinum;
 		FREE_LOCK(&lk);
 		if ((vp = ufs_ihashget(ump->um_dev, inum)) == NULL) {
 			ACQUIRE_LOCK(&lk);
 			if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0
 			    && dap == LIST_FIRST(diraddhdp))
 				panic("flush_pagedep_deps: flush 1 failed");
 			/*
 			 * If the inode still has bitmap dependencies,
 			 * push them to disk.
 			 */
 			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 				gotit = getdirtybuf(&inodedep->id_buf,MNT_WAIT);
 				FREE_LOCK(&lk);
 				if (gotit &&
 				    (error = VOP_BWRITE(inodedep->id_buf->b_vp,
 				     inodedep->id_buf)) != 0)
 					break;
 				ACQUIRE_LOCK(&lk);
 			}
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			/*
 			 * If the inode is still sitting in a buffer waiting
 			 * to be written, push it to disk.
 			 */
 			FREE_LOCK(&lk);
 			if ((error = bread(ump->um_devvp,
 			    fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
 			    (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
 				break;
 			if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			if (dap == LIST_FIRST(diraddhdp))
 				panic("flush_pagedep_deps: flush 2 failed");
 			continue;
 		}
 		if (vp->v_type == VDIR) {
 			/*
 			 * A newly allocated directory must have its "." and
 			 * ".." entries written out before its name can be
 			 * committed in its parent. We do not want or need
 			 * the full semantics of a synchronous VOP_FSYNC as
 			 * that may end up here again, once for each directory
 			 * level in the filesystem. Instead, we push the blocks
 			 * and wait for them to clear.
 			 */
 			if ((error =
 			    VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
 				vput(vp);
 				break;
 			}
 			drain_output(vp, 0);
 		}
 		error = UFS_UPDATE(vp, 1);
 		vput(vp);
 		if (error)
 			break;
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == LIST_FIRST(diraddhdp))
 			panic("flush_pagedep_deps: flush 3 failed");
 		ACQUIRE_LOCK(&lk);
 	}
 	if (error)
 		ACQUIRE_LOCK(&lk);
 	return (error);
 }
 
 /*
  * A large burst of file addition or deletion activity can drive the
  * memory load excessively high. Therefore we deliberately slow things
  * down and speed up the I/O processing if we find ourselves with too
  * many dependencies in progress.
  */
 static int
 request_cleanup(resource, islocked)
 	int resource;
 	int islocked;
 {
 	struct callout_handle handle;
 	struct proc *p = CURPROC;
 
 	/*
 	 * We never hold up the filesystem syncer process.
 	 */
 	if (p == filesys_syncer)
 		return (0);
 	/*
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
 	 * with removal dependencies. We would like to do the cleanup
 	 * here, but we probably hold an inode locked at this point and 
 	 * that might deadlock against one that we try to clean. So,
 	 * the best that we can do is request the syncer daemon to do
 	 * the cleanup for us.
 	 */
 	switch (resource) {
 
 	case FLUSH_INODES:
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps = 1;
 		break;
 
 	case FLUSH_REMOVE:
 		stat_blk_limit_push += 1;
 		req_clear_remove = 1;
 		break;
 
 	default:
 		panic("request_cleanup: unknown type");
 	}
 	/*
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
 	if (islocked == 0)
 		ACQUIRE_LOCK(&lk);
 	if (proc_waiting == 0) {
 		proc_waiting = 1;
 		handle = timeout(pause_timer, NULL,
 		    tickdelay > 2 ? tickdelay : 2);
 	}
 	FREE_LOCK_INTERLOCKED(&lk);
 	(void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0);
 	ACQUIRE_LOCK_INTERLOCKED(&lk);
 	if (proc_waiting) {
 		untimeout(pause_timer, NULL, handle);
 		proc_waiting = 0;
 	} else {
 		switch (resource) {
 
 		case FLUSH_INODES:
 			stat_ino_limit_hit += 1;
 			break;
 
 		case FLUSH_REMOVE:
 			stat_blk_limit_hit += 1;
 			break;
 		}
 	}
 	if (islocked == 0)
 		FREE_LOCK(&lk);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
  * to indicate that there is no longer a timer running.
  */
 void
 pause_timer(arg)
 	void *arg;
 {
 
 	proc_waiting = 0;
 	wakeup(&proc_waiting);
 }
 
 /*
  * Flush out a directory with at least one removal dependency in an effort
  * to reduce the number of freefile and freeblks dependency structures.
  */
 static void
 clear_remove(p)
 	struct proc *p;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, cnt;
 	ino_t ino;
 
 	ACQUIRE_LOCK(&lk);
 	for (cnt = 0; cnt < pagedep_hash; cnt++) {
 		pagedephd = &pagedep_hashtbl[next++];
 		if (next >= pagedep_hash)
 			next = 0;
 		for (pagedep = LIST_FIRST(pagedephd); pagedep;
 		     pagedep = LIST_NEXT(pagedep, pd_hash)) {
 			if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
 				continue;
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
 			FREE_LOCK(&lk);
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_remove: fsync", error);
 			drain_output(vp, 0);
 			vput(vp);
 			return;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Clear out a block of dirty inodes in an effort to reduce
  * the number of inodedep dependency structures.
  */
 static void
 clear_inodedeps(p)
 	struct proc *p;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
 	for (cnt = 0; cnt < inodedep_hash; cnt++) {
 		inodedephd = &inodedep_hashtbl[next++];
 		if (next >= inodedep_hash)
 			next = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	/*
 	 * Ugly code to find mount point given pointer to superblock.
 	 */
 	fs = inodedep->id_fs;
 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 	     mp = CIRCLEQ_NEXT(mp, mnt_list))
 		if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
 			break;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
 	firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
 	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
 		if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
 			break;
 	/*
 	 * Asynchronously push all but the last inode with dependencies.
 	 * Synchronously push the last inode with dependencies to ensure
 	 * that the inode block gets written to free up the inodedeps.
 	 */
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
 		FREE_LOCK(&lk);
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			return;
 		}
 		if (ino == lastino) {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
 				softdep_error("clear_inodedeps: fsync1", error);
 		} else {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_inodedeps: fsync2", error);
 			drain_output(vp, 0);
 		}
 		vput(vp);
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Acquire exclusive access to a buffer.
  * Must be called with splbio blocked.
  * Return 1 if buffer was acquired.
  */
 static int
 getdirtybuf(bpp, waitfor)
 	struct buf **bpp;
 	int waitfor;
 {
 	struct buf *bp;
 
 	for (;;) {
 		if ((bp = *bpp) == NULL)
 			return (0);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) == 0)
 			break;
 		if (waitfor != MNT_WAIT)
 			return (0);
 		FREE_LOCK_INTERLOCKED(&lk);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) != ENOLCK)
 			panic("getdirtybuf: inconsistent lock");
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
 		return (0);
 	}
 	bremfree(bp);
 	return (1);
 }
 
 /*
  * Wait for pending output on a vnode to complete.
  * Must be called with vnode locked.
  */
 static void
 drain_output(vp, islocked)
 	struct vnode *vp;
 	int islocked;
 {
 
 	if (!islocked)
 		ACQUIRE_LOCK(&lk);
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		FREE_LOCK_INTERLOCKED(&lk);
 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0);
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if (!islocked)
 		FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever a buffer that is being invalidated or reallocated
  * contains dependencies. This should only happen if an I/O error has
  * occurred. The routine is called with the buffer locked.
  */ 
 void
 softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
 	if ((bp->b_flags & B_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
 	panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
  * Function to handle asynchronous write errors in the filesystem.
  */
 void
 softdep_error(func, error)
 	char *func;
 	int error;
 {
 
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
Index: head/sys/dev/pccard/if_xe.c
===================================================================
--- head/sys/dev/pccard/if_xe.c	(revision 49534)
+++ head/sys/dev/pccard/if_xe.c	(revision 49535)
@@ -1,2507 +1,2507 @@
 /*-
  * Copyright (c) 1998, 1999 Scott Mitchell
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$Id: if_xe.c,v 1.20 1999/06/13 19:17:40 scott Exp $
  *	$FreeBSD$
  */
 
 /*
  * Portions of this software were derived from Werner Koch's xirc2ps driver
  * for Linux under the terms of the following license (from v1.30 of the
  * xirc2ps driver):
  *
  * Copyright (c) 1997 by Werner Koch (dd9jn)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, and the entire permission notice in its entirety,
  *    including the disclaimer of warranties.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.	IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*		
  * FreeBSD device driver for Xircom CreditCard PCMCIA Ethernet adapters.  The
  * following cards are currently known to work with the driver:
  *   Xircom CreditCard 10/100 (CE3)
  *   Xircom CreditCard Ethernet + Modem 28 (CEM28)
  *   Xircom CreditCard Ethernet 10/100 + Modem 56 (CEM56)
  *   Xircom RealPort Ethernet 10
  *   Xircom RealPort Ethernet 10/100
  *   Xircom RealPort Ethernet 10/100 + Modem 56 (REM56, REM56G)
  *   Intel EtherExpress Pro/100 PC Card Mobile Adapter 16 (Pro/100 M16A)
  *   Compaq Netelligent 10/100 PC Card (CPQ-10/100)
  *
  * Some other cards *should* work, but support for them is either broken or in 
  * an unknown state at the moment.  I'm always interested in hearing from
  * people who own any of these cards:
  *   Xircom CreditCard 10Base-T (PS-CE2-10)
  *   Xircom CreditCard Ethernet + ModemII (CEM2)
  *   Xircom CEM28 and CEM33 Ethernet/Modem cards (may be variants of CEM2?)
  *
  * Thanks to all who assisted with the development and testing of the driver,
  * especially: Werner Koch, Duke Kamstra, Duncan Barclay, Jason George, Dru
  * Nelson, Mike Kephart, Bill Rainey and Douglas Rand.  Apologies if I've left
  * out anyone who deserves a mention here.
  *
  * Special thanks to Ade Lovett for both hosting the mailing list and doing
  * the CEM56/REM56 support code; and the FreeBSD UK Users' Group for hosting
  * the web pages.
  *
  * Contact points:
  *
  * Driver web page: http://ukug.uk.freebsd.org/~scott/xe_drv/
  *
  * Mailing list: http://www.lovett.com/lists/freebsd-xircom/
  * or send "subscribe freebsd-xircom" to <majordomo@lovett.com>
  *
  * Author email: <scott@uk.freebsd.org>
  */
 
 
 #ifndef XE_DEBUG
 #define XE_DEBUG 1	/* Increase for more voluminous output! */
 #endif
 
 #include "xe.h"
 #include "card.h"
 #include "apm.h"
 #include "bpf.h"
 
 #if NXE > 0
 
 #if NCARD > 0
 
 #include <sys/param.h>
 #include <sys/cdefs.h>
-#include <sys/conf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/select.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
+#include <sys/conf.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_mib.h>
 #if NBPF > 0
 #include <net/bpf.h>
 #endif /* NBPF > 0 */
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <dev/pccard/if_xereg.h>
 #include <machine/clock.h>
 #if NAPM > 0
 #include <machine/apm_bios.h>
 #endif /* NAPM > 0 */
 
 #include <pccard/cardinfo.h>
 #include <pccard/cis.h>
 #include <pccard/driver.h>
 #include <pccard/slot.h>
 
 
 
 /*
  * One of these structures per allocated device
  */
 struct xe_softc {
   struct arpcom arpcom;
   struct ifmedia ifmedia;
   struct ifmib_iso_8802_3 mibdata;
   struct callout_handle chand;
   struct isa_device *dev;
   struct pccard_devinfo *crd;
   struct ifnet *ifp;
   struct ifmedia *ifm;
   char *card_type;	/* Card model name */
   char *vendor;		/* Card manufacturer */
   int unit;		/* Unit number, from dev->id_unit */
   int srev;     	/* Silicon revision */
   int tx_queued;	/* Packets currently waiting to transmit */
   int tx_tpr;		/* Last value of TPR reg on card */
   int tx_collisions;	/* Collisions since last successful send */
   int tx_timeouts;	/* Count of transmit timeouts */
   int autoneg_status;	/* Autonegotiation progress state */
   int media;		/* Private media word */
   u_char version;	/* Bonding Version register from card */
   u_char modem;		/* 1 = Card has a modem */
   u_char ce2;		/* 1 = Card has CE2 silicon */
   u_char mohawk;      	/* 1 = Card has Mohawk (CE3) silicon */
   u_char dingo;    	/* 1 = Card has Dingo (CEM56) silicon */
   u_char phy_ok;	/* 1 = MII-compliant PHY found and initialised */
   u_char gone;		/* 1 = Card bailed out */
 #if NAPM > 0
   struct apmhook suspend_hook;
   struct apmhook resume_hook;
 #endif /* NAPM > 0 */
 };
 
 static struct xe_softc *sca[MAXSLOT];
 
 
 /*
  * MII command structure
  */
 struct xe_mii_frame {
   u_int8_t  mii_stdelim;
   u_int8_t  mii_opcode;
   u_int8_t  mii_phyaddr;
   u_int8_t  mii_regaddr;
   u_int8_t  mii_turnaround;
   u_int16_t mii_data;
 };
 
 /*
  * For accessing card registers
  */
 #define XE_INB(r)         inb(scp->dev->id_iobase+(r))
 #define XE_INW(r)         inw(scp->dev->id_iobase+(r))
 #define XE_OUTB(r, b)     outb(scp->dev->id_iobase+(r), (b))
 #define XE_OUTW(r, w)     outw(scp->dev->id_iobase+(r), (w))
 #define XE_SELECT_PAGE(p) XE_OUTB(XE_PR, (p))
 
 /*
  * Horrid stuff for accessing CIS tuples
  */
 #define CARD_MAJOR		50
 #define CISTPL_BUFSIZE		512
 #define CISTPL_TYPE(tpl)	tpl[0]
 #define CISTPL_LEN(tpl)		tpl[2]
 #define CISTPL_DATA(tpl,pos)	tpl[4 + ((pos)<<1)]
 
 /*
  * Media autonegotiation progress constants
  */
 #define XE_AUTONEG_NONE		0	/* No autonegotiation in progress */
 #define XE_AUTONEG_WAITING	1	/* Waiting for transmitter to go idle */
 #define XE_AUTONEG_STARTED	2	/* Waiting for autonegotiation to complete */
 #define XE_AUTONEG_100TX	3	/* Trying to force 100baseTX link */
 #define XE_AUTONEG_FAIL		4	/* Autonegotiation failed */
 
 
 /*
  * Prototypes start here
  */
 static int       xe_probe		(struct isa_device *dev);
 static int       xe_card_init		(struct pccard_devinfo *devi);
 static int       xe_attach		(struct isa_device *dev);
 static void      xe_init		(void *xscp);
 static void      xe_start		(struct ifnet *ifp);
 static int       xe_ioctl		(struct ifnet *ifp, u_long command, caddr_t data);
 static int       xe_card_intr		(struct pccard_devinfo *devi);
 static void      xe_watchdog		(struct ifnet *ifp);
 static int       xe_media_change	(struct ifnet *ifp);
 static void      xe_media_status	(struct ifnet *ifp, struct ifmediareq *mrp);
 static timeout_t xe_setmedia;
 static void      xe_hard_reset		(struct xe_softc *scp);
 static void      xe_soft_reset		(struct xe_softc *scp);
 static void      xe_stop		(struct xe_softc *scp);
 static void      xe_enable_intr		(struct xe_softc *scp);
 static void      xe_disable_intr	(struct xe_softc *scp);
 static void      xe_setmulti		(struct xe_softc *scp);
 static void      xe_setaddrs		(struct xe_softc *scp);
 static int       xe_pio_write_packet	(struct xe_softc *scp, struct mbuf *mbp);
 static void      xe_card_unload		(struct pccard_devinfo *devi);
 static u_int32_t xe_compute_crc		(u_int8_t *data, int len);
 static int       xe_compute_hashbit	(u_int32_t crc);
 
 /*
  * MII functions
  */
 static void      xe_mii_sync		(struct xe_softc *scp);
 static int       xe_mii_init    	(struct xe_softc *scp);
 static void      xe_mii_send		(struct xe_softc *scp, u_int32_t bits, int cnt);
 static int       xe_mii_readreg		(struct xe_softc *scp, struct xe_mii_frame *frame);
 static int       xe_mii_writereg	(struct xe_softc *scp, struct xe_mii_frame *frame);
 static u_int16_t xe_phy_readreg		(struct xe_softc *scp, u_int16_t reg);
 static void      xe_phy_writereg	(struct xe_softc *scp, u_int16_t reg, u_int16_t data);
 
 /*
  * Debug functions
  */
 #ifdef XE_DEBUG
 #define XE_REG_DUMP(scp)		xe_reg_dump((scp))
 #define XE_MII_DUMP(scp)		xe_mii_dump((scp))
 static void      xe_reg_dump		(struct xe_softc *scp);
 static void      xe_mii_dump		(struct xe_softc *scp);
 #else
 #define XE_REG_DUMP(scp)
 #define XE_MII_DUMP(scp)
 #endif
 
 #if NAPM > 0
 /*
  * APM hook functions
  */
 static int       xe_suspend		(void *xunit);
 static int       xe_resume		(void *xunit);
 #endif /* NAPM > 0 */
 
 
 /*
  * PCMCIA driver hooks
  */
 #ifdef PCCARD_MODULE
 PCCARD_MODULE(xe, xe_card_init, xe_card_unload, xe_card_intr, 0, net_imask);
 #else
 static struct pccard_device xe_info = {	/* For pre 3.1-STABLE code */
 	"xe",
 	xe_card_init,
 	xe_card_unload,
 	xe_card_intr,
 	0,
 	&net_imask
 };
 DATA_SET(pccarddrv_set, xe_info);
 #endif /* PCCARD_MODULE */
 
 
 /*
  * ISA driver hooks.  I'd like to do without these but the kernel config stuff 
  * seems to require them.
  */
 struct isa_driver xedriver = {
   xe_probe,
   xe_attach,
   "xe"
 };
 
 
 
 /*
  * ISA probe routine.
  * All of the supported devices are PCMCIA cards.  I have no idea if it's even 
  * possible to successfully probe/attach these at boot time (pccardd normally
  * does a lot of setup work) so I don't even bother trying.
  */
 static int
 xe_probe (struct isa_device *dev) {
 #ifdef XE_DEBUG
   printf("xe%d: probe\n", dev->id_unit);
 #endif
   bzero(sca, MAXSLOT * sizeof(sca[0]));
   return 0;
 }
 
 
 /*
  * Two routines to read from/write to the attribute memory
  * the write portion is used only for fixing up the RealPort cards,
  * the reader portion was needed for debugging info, and duplicated some
  * code in xe_card_init(), so it appears here instead with suitable
  * modifications to xe_card_init()
  * -aDe Lovett
  */
 static int
 xe_memwrite(struct pccard_devinfo *devi, off_t offset, u_char byte)
 {
   struct iovec iov;
   struct uio uios;
 
   iov.iov_base = &byte;
   iov.iov_len = sizeof(byte);
 
   uios.uio_iov = &iov;
   uios.uio_iovcnt = 1;
   uios.uio_offset = offset;
   uios.uio_resid = sizeof(byte);
   uios.uio_segflg = UIO_SYSSPACE;
   uios.uio_rw = UIO_WRITE;
   uios.uio_procp = 0;
 
 #if 0 /* THIS IS BOGUS */
   return cdevsw[CARD_MAJOR]->d_write(makedev(CARD_MAJOR, devi->slt->slotnum), &uios, 0);
 #else
   return (-1);
 #endif
 }
 
 
 static int
 xe_memread(struct pccard_devinfo *devi, off_t offset, u_char *buf, int size)
 {
   struct iovec iov;
   struct uio uios;
 
   iov.iov_base = buf;
   iov.iov_len = size;
 
   uios.uio_iov = &iov;
   uios.uio_iovcnt = 1;
   uios.uio_offset = offset;
   uios.uio_resid = size;
   uios.uio_segflg = UIO_SYSSPACE;
   uios.uio_rw = UIO_READ;
   uios.uio_procp = 0;
 
 #if 0 /* THIS IS BOGUS */
   return cdevsw[CARD_MAJOR]->d_read(makedev(CARD_MAJOR, devi->slt->slotnum), &uios, 0);
 #else
   return (-1);
 #endif
 }
 
 
 /*
  * Hacking for RealPort cards
  */
 static int
 xe_cem56fix(struct xe_softc *scp)
 {
   struct pccard_devinfo *devi;
   struct slot *slt;
   struct slot_ctrl *ctrl;
   int ioport, fail;
 
   /* initialise a few variables */
   devi = scp->crd;
   slt = devi->slt;
   ctrl = slt->ctrl;
 
   /* allocate a new I/O slot for the ethernet */
   /* XXX: ctrl->mapio() always appears to return 0 (success), so
    *      this may cause problems if another device is listening
    *	  on 0x300 already.  In this case, you should choose a
    *      known free I/O port address in the kernel config line
    *      for the driver.  It will be picked up here and used
    *      instead of the autodetected value.
    */
   slt->io[1].window = 1;
   slt->io[1].flags = IODF_WS|IODF_16BIT|IODF_ZEROWS|IODF_ACTIVE;
   slt->io[1].size = 0x10;
 
 #ifdef	XE_IOBASE
 
   printf( "xe%d: user requested ioport 0x%x\n", scp->unit, XE_IOBASE );
   ioport = XE_IOBASE;
   slt->io[1].start = ioport;
   fail = ctrl->mapio(slt, 1);
 
 #else
 
   for (ioport = 0x300; ioport < 0x400; ioport += 0x10) {
     slt->io[1].start = ioport;
     if ((fail = ctrl->mapio( slt, 1 )) == 0)
       break;
   }
 
 #endif
 
   /* did we find one? */
   if (fail) {
     printf( "xe%d: xe_cem56fix: no free address space\n", scp->unit );
     return -1;
   }
 
 
   /* munge the id_iobase entry for use by the rest of the driver */
 #if XE_DEBUG > 1
   printf( "xe%d: using 0x%x for RealPort ethernet\n", scp->unit, ioport );
 #endif
   scp->dev->id_iobase = ioport;
   scp->dev->id_alive  = 0x10;
 
   /* magic to set up the ethernet */
   xe_memwrite( devi, DINGO_ECOR, DINGO_ECOR_IRQ_LEVEL|DINGO_ECOR_INT_ENABLE|
 	       DINGO_ECOR_IOB_ENABLE|DINGO_ECOR_ETH_ENABLE );
   xe_memwrite( devi, DINGO_EBAR0, ioport & 0xff );
   xe_memwrite( devi, DINGO_EBAR1, (ioport >> 8) & 0xff );
 
   xe_memwrite( devi, DINGO_DCOR0, DINGO_DCOR0_SF_INT );
   xe_memwrite( devi, DINGO_DCOR1, DINGO_DCOR1_INT_LEVEL|DINGO_DCOR1_EEDIO );
   xe_memwrite( devi, DINGO_DCOR2, 0x00 );
   xe_memwrite( devi, DINGO_DCOR3, 0x00 );
   xe_memwrite( devi, DINGO_DCOR4, 0x00 );
 
   /* success! */
   return 0;
 }
 
 	
 /*
  * PCMCIA probe routine.
  * Probe and identify the device.  Called by the slot manager when the card is 
  * inserted or the machine wakes up from suspend mode.  Assmes that the slot
  * structure has been initialised already.
  */
 static int
 xe_card_init(struct pccard_devinfo *devi)
 {
   struct xe_softc *scp;
   struct isa_device *dev;
   u_char buf[CISTPL_BUFSIZE];
   u_char ver_str[CISTPL_BUFSIZE>>1];
   off_t offs;
   int unit, success, rc, i;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   dev = &devi->isahd;
   success = 0;
 
 #ifdef XE_DEBUG
   printf("xe: Probing for unit %d\n", unit);
 #endif
 
   /* Check that unit number is OK */
   if (unit > MAXSLOT) {
     printf("xe%d: bad unit\n", unit);
     return (ENODEV);
   }
 
   /* Don't attach an active device */
   if (scp && !scp->gone) {
     printf("xe%d: already attached\n", unit);
     return (EBUSY);
   }
 
   /* Allocate per-instance storage */
   if (!scp) {
     if ((scp = malloc(sizeof(*scp), M_DEVBUF, M_NOWAIT)) == NULL) {
       printf("xe%d: failed to allocage driver storage\n", unit);
       return (ENOMEM);
     }
     bzero(scp, sizeof(*scp));
   }
 
   /* Re-attach an existing device */
   if (scp->gone) {
     scp->gone = 0;
     return 0;
   }
 
   /* Grep through CIS looking for relevant tuples */
   offs = 0;
   do {
     u_int16_t vendor;
     u_int8_t rev, media, prod;
 
     /*
      * Read tuples one at a time into buf.  Sucks, but it only happens once.
      * XXX - This assumes that attribute has been mapped by pccardd, which
      * XXX - seems to be the default situation.  If not, we're well and truly
      * XXX - FUBAR.  This is a general PCCARD problem, not our fault :)
      */
     if ((rc = xe_memread( devi, offs, buf, CISTPL_BUFSIZE )) == 0) {
 
       switch (CISTPL_TYPE(buf)) {
 
        case 0x15:	/* Grab version string (needed to ID some weird CE2's) */
 #if XE_DEBUG > 1
 	printf("xe%d: Got version string (0x15)\n", unit);
 #endif
 	for (i = 0; i < CISTPL_LEN(buf); ver_str[i] = CISTPL_DATA(buf, i++));
 	ver_str[i] = '\0';
 	ver_str[(CISTPL_BUFSIZE>>1) - 1] = CISTPL_LEN(buf);
 	success++;
 	break;
 
        case 0x20:	/* Figure out what type of card we have */
 #if XE_DEBUG > 1
 	printf("xe%d: Got card ID (0x20)\n", unit);
 #endif
 	vendor = CISTPL_DATA(buf, 0) + (CISTPL_DATA(buf, 1) << 8);
 	rev = CISTPL_DATA(buf, 2);
 	media = CISTPL_DATA(buf, 3);
 	prod = CISTPL_DATA(buf, 4);
 
 	switch (vendor) {	/* Get vendor ID */
 	 case 0x0105:
 	  scp->vendor = "Xircom"; break;
 	 case 0x0138:
 	 case 0x0183:
 	  scp->vendor = "Compaq"; break;
 	 case 0x0089:
 	  scp->vendor = "Intel"; break;
 	 default:
 	  scp->vendor = "Unknown";
 	}
 
 	if (!((prod & 0x40) && (media & 0x01))) {
 #if XE_DEBUG > 1
 	printf("xe%d: Not a PCMCIA Ethernet card!\n", unit);
 #endif
 	  rc = ENODEV;		/* Not a PCMCIA Ethernet device */
 	}
 	else {
 	  if (media & 0x10) {	/* Ethernet/modem cards */
 #if XE_DEBUG > 1
 	printf("xe%d: Card is Ethernet/modem combo\n", unit);
 #endif
 	    scp->modem = 1;
 	    switch (prod & 0x0f) {
 	     case 1:
 	      scp->card_type = "CEM"; break;
 	     case 2:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM2"; break;
 	     case 3:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM3"; break;
 	     case 4:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM33"; break;
 	     case 5:
 	      scp->mohawk = 1;
 	      scp->card_type = "CEM56M"; break;
 	     case 6:
 	     case 7:		/* Some kind of RealPort card */
 	      scp->mohawk = 1;
 	      scp->dingo = 1;
 	      scp->card_type = "CEM56"; break;
 	     default:
 	      rc = ENODEV;
 	    }
 	  }
 	  else {		/* Ethernet-only cards */
 #if XE_DEBUG > 1
 	printf("xe%d: Card is Ethernet only\n", unit);
 #endif
 	    switch (prod & 0x0f) {
 	     case 1:
 	      scp->card_type = "CE"; break;
 	     case 2:
 	      scp->ce2 = 1;
 	      scp->card_type = "CE2"; break;
 	     case 3:
 	      scp->mohawk = 1;
 	      scp->card_type = "CE3"; break;
 	     default:
 	      rc = ENODEV;
 	    }
 	  }
 	}
 	success++;
 	break;
 
        case 0x22:	/* Get MAC address */
 	if ((CISTPL_LEN(buf) == 8) &&
 	    (CISTPL_DATA(buf, 0) == 0x04) &&
 	    (CISTPL_DATA(buf, 1) == ETHER_ADDR_LEN)) {
 #if XE_DEBUG > 1
 	  printf("xe%d: Got MAC address (0x22)\n", unit);
 #endif
 	  for (i = 0; i < ETHER_ADDR_LEN; scp->arpcom.ac_enaddr[i] = CISTPL_DATA(buf, i+2), i++);
 	}
 	success++;
 	break;
        default:
       }
     }
 
     /* Skip to next tuple */
     offs += ((CISTPL_LEN(buf) + 2) << 1);
 
   } while ((CISTPL_TYPE(buf) != 0xff) && (CISTPL_LEN(buf) != 0xff) && (rc == 0));
 
 
   /* Die now if something went wrong above */
   if ((rc != 0) || (success < 3)) {
     free(scp, M_DEVBUF);
     return rc;
   }
 
   /* Check for certain strange CE2's that look like CE's */
   if (strcmp(scp->card_type, "CE") == 0) {
     u_char *str = ver_str;
 #if XE_DEBUG > 1
     printf("xe%d: Checking for weird CE2 string\n", unit);
 #endif
     str += strlen(str) + 1;			/* Skip forward to 3rd version string */
     str += strlen(str) + 1;
     str += strlen(str) + 1;
     for (i = 0; i < strlen(str) - 2; i++) {
       if (bcmp(&str[i], "CE2", 3) ==0) {	/* Look for "CE2" string */
 	scp->card_type = "CE2";
       }
     }
   }
 
   /* Reject unsupported cards */
   if (strcmp(scp->card_type, "CE") == 0 || strcmp(scp->card_type, "CEM") == 0) {
     printf("xe%d: Sorry, your %s card is not supported :(\n", unit, scp->card_type);
     free(scp, M_DEVBUF);
     return ENODEV;
   }
 
   /* Fill in some private data */
   sca[unit] = scp;
   scp->dev = &devi->isahd;
   scp->crd = devi;
   scp->ifp = &scp->arpcom.ac_if;
   scp->ifm = &scp->ifmedia;
   scp->unit = unit;
   scp->autoneg_status = 0;
 
   /* Hack RealPorts into submission */
   if (scp->dingo && xe_cem56fix(scp) < 0) {
     printf( "xe%d: Unable to fix your RealPort\n", unit );
     sca[unit] = 0;
     free(scp, M_DEVBUF);
     return ENODEV;
   }
 
   /* Hopefully safe to read this here */
   XE_SELECT_PAGE(4);
   scp->version = XE_INB(XE_BOV);
 
   /* Attempt to attach the device */
   if (!xe_attach(scp->dev)) {
     sca[unit] = 0;
     free(scp, M_DEVBUF);
     return ENXIO;
   }
 
 #if NAPM > 0
   /* Establish APM hooks once device attached */
   scp->suspend_hook.ah_name = "xe_suspend";
   scp->suspend_hook.ah_fun = xe_suspend;
   scp->suspend_hook.ah_arg = (void *)unit;
   scp->suspend_hook.ah_order = APM_MIN_ORDER;
   apm_hook_establish(APM_HOOK_SUSPEND, &scp->suspend_hook);
   scp->resume_hook.ah_name = "xe_resume";
   scp->resume_hook.ah_fun = xe_resume;
   scp->resume_hook.ah_arg = (void *)unit;
   scp->resume_hook.ah_order = APM_MIN_ORDER;
   apm_hook_establish(APM_HOOK_RESUME, &scp->resume_hook);
 #endif /* NAPM > 0 */
 
   /* Success */
   return 0;
 }
 
 
 /*
  * Attach a device (called when xe_card_init succeeds).  Assume that the probe
  * routine has set up the softc structure correctly and that we can trust the
  * unit number.
  */
 static int
 xe_attach (struct isa_device *dev) {
   struct xe_softc *scp = sca[dev->id_unit];
   int i;
 
 #ifdef XE_DEBUG
   printf("xe%d: attach\n", scp->unit);
 #endif
 
   /* Initialise the ifnet structure */
   if (!scp->ifp->if_name) {
     scp->ifp->if_softc = scp;
     scp->ifp->if_name = "xe";
     scp->ifp->if_unit = scp->unit;
     scp->ifp->if_timer = 0;
     scp->ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
     scp->ifp->if_linkmib = &scp->mibdata;
     scp->ifp->if_linkmiblen = sizeof scp->mibdata;
     scp->ifp->if_output = ether_output;
     scp->ifp->if_start = xe_start;
     scp->ifp->if_ioctl = xe_ioctl;
     scp->ifp->if_watchdog = xe_watchdog;
     scp->ifp->if_init = xe_init;
     scp->ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
   }
 
   /* Initialise the ifmedia structure */
   ifmedia_init(scp->ifm, 0, xe_media_change, xe_media_status);
   callout_handle_init(&scp->chand);
 
   /*
    * Fill in supported media types.  Some cards _do_ support full duplex
    * operation, but this driver doesn't, yet.  Therefore we leave those modes
    * out of the list.  We support some form of autoselection in all cases.
    */
   if (scp->mohawk) {
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_100_TX, 0, NULL);
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_T, 0, NULL);
   }
   else {
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_T, 0, NULL);
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_2, 0, NULL);
   }
   ifmedia_add(scp->ifm, IFM_ETHER|IFM_AUTO, 0, NULL);
 
   /* Default is to autoselect best supported media type */
   ifmedia_set(scp->ifm, IFM_ETHER|IFM_AUTO);
 
   /* Print some useful information */
   printf("\n");
   printf("xe%d: %s %s, bonding version %#x%s%s\n",
 	 scp->unit,
 	 scp->vendor,
 	 scp->card_type,
 	 scp->version,
 	 scp->mohawk ? ", 100Mbps capable" : "",
 	 scp->modem ?  ", with modem"      : "");
   if (scp->mohawk) {
     XE_SELECT_PAGE(0x10);
     printf("xe%d: DingoID = %#x, RevisionID = %#x, VendorID = %#x\n",
 	   scp->unit,
 	   XE_INW(XE_DINGOID),
 	   XE_INW(XE_RevID),
 	   XE_INW(XE_VendorID));
   }
   if (scp->ce2) {
     XE_SELECT_PAGE(0x45);
     printf("xe%d: CE2 version = %#x\n",
 	   scp->unit,
 	   XE_INB(XE_REV));
   }
 
   /* Print MAC address */
   printf("xe%d: Ethernet address %02x", scp->unit, scp->arpcom.ac_enaddr[0]);
   for (i = 1; i < ETHER_ADDR_LEN; i++) {
     printf(":%02x", scp->arpcom.ac_enaddr[i]);
   }
   printf("\n");
 
   /* Attach the interface */
   if_attach(scp->ifp);
   ether_ifattach(scp->ifp);
 
 #if NBPF > 0
   /* If BPF is in the kernel, call the attach for it */
 #if XE_DEBUG > 1
   printf("xe%d: BPF listener attached\n", scp->unit);
 #endif
   bpfattach(scp->ifp, DLT_EN10MB, sizeof(struct ether_header));
 #endif
 
   /* Done */
   return 1;
 }
 
 
 /*
  * Initialize device.  Completes the reset procedure on the card and starts
  * output.  If there's an autonegotiation in progress we DON'T do anything;
  * the media selection code will call us again when it's done.
  */
 static void
 xe_init(void *xscp) {
   struct xe_softc *scp = xscp;
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: init\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   if (TAILQ_EMPTY(&scp->ifp->if_addrhead)) return;
 
   /* Reset transmitter flags */
   scp->tx_queued = 0;
   scp->tx_tpr = 0;
   scp->tx_collisions = 0;
   scp->ifp->if_timer = 0;
 
   s = splimp();
 
   XE_SELECT_PAGE(0x42);
   XE_OUTB(XE_SWC0, 0x20);	/* Disable source insertion (WTF is that?) */
 
   /*
    * Set the 'local memory dividing line' -- splits the 32K card memory into
    * 8K for transmit buffers and 24K for receive.  This is done automatically
    * on newer revision cards.
    */
   if (scp->srev != 1) {
     XE_SELECT_PAGE(2);
     XE_OUTW(XE_RBS, 0x2000);
   }
 
   /* Set up multicast addresses */
   xe_setmulti(scp);
 
   /* Fix the data offset register -- reset leaves it off-by-one */
   XE_SELECT_PAGE(0);
   XE_OUTW(XE_DO, 0x2000);
 
   /*
    * Set MAC interrupt masks and clear status regs.  The bit names are direct
    * from the Linux code; I have no idea what most of them do.
    */
   XE_SELECT_PAGE(0x40);		/* Bit 7..0 */
   XE_OUTB(XE_RX0Msk, 0xff);	/* ROK, RAB, rsv, RO,  CRC, AE,  PTL, MP  */
   XE_OUTB(XE_TX0Msk, 0xff);	/* TOK, TAB, SQE, LL,  TU,  JAB, EXC, CRS */
   XE_OUTB(XE_TX0Msk+1, 0xb0);	/* rsv, rsv, PTD, EXT, rsv, rsv, rsv, rsv */
   XE_OUTB(XE_RST0, 0x00);	/* ROK, RAB, REN, RO,  CRC, AE,  PTL, MP  */
   XE_OUTB(XE_TXST0, 0x00);	/* TOK, TAB, SQE, LL,  TU,  JAB, EXC, CRS */
   XE_OUTB(XE_TXST1, 0x00);	/* TEN, rsv, PTD, EXT, retry_counter:4    */
 
   /*
    * Check for an in-progress autonegotiation.  If one is active, just set
    * IFF_RUNNING and return.  The media selection code will call us again when 
    * it's done.
    */
   if (scp->autoneg_status) {
     scp->ifp->if_flags |= IFF_RUNNING;
   }
   else {
     /* Enable receiver, put MAC online */
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_RX_ENABLE|XE_CMD0_ONLINE);
 
     /* Set up IMR, enable interrupts */
     xe_enable_intr(scp);
 
     /* Attempt to start output */
     scp->ifp->if_flags |= IFF_RUNNING;
     scp->ifp->if_flags &= ~IFF_OACTIVE;
     xe_start(scp->ifp);
   }
 
   (void)splx(s);
 }
 
 
 /*
  * Start output on interface.  We make two assumptions here:
  *  1) that the current priority is set to splimp _before_ this code
  *     is called *and* is returned to the appropriate priority after
  *     return
  *  2) that the IFF_OACTIVE flag is checked before this code is called
  *     (i.e. that the output part of the interface is idle)
  */
 static void
 xe_start(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
   struct mbuf *mbp;
 
   if (scp->gone) return;
 
   /*
    * Loop while there are packets to be sent, and space to send them.
    */
   while (1) {
     IF_DEQUEUE(&ifp->if_snd, mbp);	/* Suck a packet off the send queue */
 
     if (mbp == NULL) {
       /*
        * We are using the !OACTIVE flag to indicate to the outside world that
        * we can accept an additional packet rather than that the transmitter
        * is _actually_ active. Indeed, the transmitter may be active, but if
        * we haven't filled all the buffers with data then we still want to
        * accept more.
        */
       ifp->if_flags &= ~IFF_OACTIVE;
       return;
     }
 
     if (xe_pio_write_packet(scp, mbp) != 0) {
       IF_PREPEND(&ifp->if_snd, mbp);	/* Push the packet back onto the queue */
       ifp->if_flags |= IFF_OACTIVE;
       return;
     }
 
 #if NBPF > 0
     /* Tap off here if there is a bpf listener */
     if (ifp->if_bpf) {
 #if XE_DEBUG > 1
       printf("xe%d: sending output packet to BPF\n", scp->unit);
 #endif
       bpf_mtap(ifp, mbp);
     }
 #endif /* NBPF > 0 */
 
     ifp->if_timer = 5;			/* In case we don't hear from the card again */
     scp->tx_queued++;
 
     m_freem(mbp);
   }
 }
 
 
 /*
  * Process an ioctl request.  Adapted from the ed driver.
  */
 static int
 xe_ioctl (register struct ifnet *ifp, u_long command, caddr_t data) {
   struct xe_softc *scp;
   int s, error;
 
   scp = ifp->if_softc;
   error = 0;
 
   if (scp->gone) {
     return ENXIO;
   }
 
   s = splimp();
 
   switch (command) {
 
    case SIOCSIFADDR:
    case SIOCGIFADDR:
    case SIOCSIFMTU:
     error = ether_ioctl(ifp, command, data);
     break;
 
    case SIOCSIFFLAGS:
     /*
      * If the interface is marked up and stopped, then start it.  If it is
      * marked down and running, then stop it.
      */
     if (ifp->if_flags & IFF_UP) {
       if (!(ifp->if_flags & IFF_RUNNING)) {
 	xe_hard_reset(scp);
 	xe_setmedia(scp);
 	xe_init(scp);
       }
     }
     else {
       if (ifp->if_flags & IFF_RUNNING)
 	xe_stop(scp);
     }
 
    case SIOCADDMULTI:
    case SIOCDELMULTI:
     /*
      * Multicast list has (maybe) changed; set the hardware filter
      * accordingly.  This also serves to deal with promiscuous mode if we have 
      * a BPF listener active.
      */
     xe_setmulti(scp);
     error = 0;
     break;
 
    case SIOCSIFMEDIA:
    case SIOCGIFMEDIA:
     /*
      * Someone wants to get/set media options.
      */
     error = ifmedia_ioctl(ifp, (struct ifreq *)data, &scp->ifmedia, command);
     break;
 
    default:
     error = EINVAL;
   }
 
   (void)splx(s);
 
   return error;
 }
 
 
 /*
  * Card interrupt handler: should return true if the interrupt was for us, in
  * case we are sharing our IRQ line with other devices (this will probably be
  * the case for multifunction cards).
  *
  * This function is probably more complicated than it needs to be, as it
  * attempts to deal with the case where multiple packets get sent between
  * interrupts.  This is especially annoying when working out the collision
  * stats.  Not sure whether this case ever really happens or not (maybe on a
  * slow/heavily loaded machine?) so it's probably best to leave this like it
  * is.
  *
  * Note that the crappy PIO used to get packets on and off the card means that 
  * you will spend a lot of time in this routine -- I can get my P150 to spend
  * 90% of its time servicing interrupts if I really hammer the network.  Could 
  * fix this, but then you'd start dropping/losing packets.  The moral of this
  * story?  If you want good network performance _and_ some cycles left over to 
  * get your work done, don't buy a Xircom card.  Or convince them to tell me
  * how to do memory-mapped I/O :)
  */
 static int
 xe_card_intr(struct pccard_devinfo *devi) {
   struct xe_softc *scp;
   struct ifnet *ifp;
   int unit, result;
   u_int16_t rx_bytes, rxs, txs;
   u_int8_t psr, isr, esr, rsr;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   ifp = &scp->arpcom.ac_if;
   rx_bytes = 0;			/* Bytes received on this interrupt */
   result = 0;			/* Set true if the interrupt is for us */
 
   if (scp->gone)
     return 0;
 
   if (scp->mohawk) {
     XE_OUTB(XE_CR, 0);		/* Disable interrupts */
   }
 
   psr = XE_INB(XE_PR);		/* Stash the current register page */
 
   /*
    * Read ISR to see what caused this interrupt.  Note that this clears the
    * ISR on CE2 type cards.
    */
   if ((isr = XE_INB(XE_ISR)) && isr != 0xff) {
 
     result = 1;			/* This device did generate an int */
     esr = XE_INB(XE_ESR);	/* Read the other status registers */
     XE_SELECT_PAGE(0x40);
     rxs = XE_INB(XE_RST0);
     XE_OUTB(XE_RST0, ~rxs & 0xff);
     txs = XE_INB(XE_TXST0);
     txs |= XE_INB(XE_TXST1) << 8;
     XE_OUTB(XE_TXST0, 0);
     XE_OUTB(XE_TXST1, 0);
     XE_SELECT_PAGE(0);
 
 #if XE_DEBUG > 2
     printf("xe%d: ISR=%#2.2x ESR=%#2.2x RST=%#2.2x TXST=%#4.4x\n", unit, isr, esr, rxs, txs);
 #endif
 
     /*
      * Handle transmit interrupts
      */
     if (isr & XE_ISR_TX_PACKET) {
       u_int8_t new_tpr, sent;
       
       if ((new_tpr = XE_INB(XE_TPR)) < scp->tx_tpr)	/* Update packet count */
 	sent = (0xff - scp->tx_tpr) + new_tpr;		/* TPR rolled over */
       else
 	sent = new_tpr - scp->tx_tpr;
 
       if (sent > 0) {				/* Packets sent since last interrupt */
 	scp->tx_tpr = new_tpr;
 	scp->tx_queued -= sent;
 	ifp->if_opackets += sent;
 	ifp->if_collisions += scp->tx_collisions;
 
 	/*
 	 * Collision stats are a PITA.  If multiples frames have been sent, we 
 	 * distribute any outstanding collision count equally amongst them.
 	 * However, if we're missing interrupts we're quite likely to also
 	 * miss some collisions; thus the total count will be off anyway.
 	 * Likewise, if we miss a frame dropped due to excessive collisions
 	 * any outstanding collisions count will be held against the next
 	 * frame to be successfully sent.  Hopefully it averages out in the
 	 * end!
 	 * XXX - This will screw up if tx_collisions/sent > 14. FIX IT!
 	 */
 	switch (scp->tx_collisions) {
 	 case 0:
 	  break;
 	 case 1:
 	  scp->mibdata.dot3StatsSingleCollisionFrames++;
 	  scp->mibdata.dot3StatsCollFrequencies[0]++;
 	  break;
 	 default:
 	  if (sent == 1) {
 	    scp->mibdata.dot3StatsMultipleCollisionFrames++;
 	    scp->mibdata.dot3StatsCollFrequencies[scp->tx_collisions-1]++;
 	  }
 	  else {		/* Distribute across multiple frames */
 	    scp->mibdata.dot3StatsMultipleCollisionFrames += sent;
 	    scp->mibdata.
 	      dot3StatsCollFrequencies[scp->tx_collisions/sent] += sent - scp->tx_collisions%sent;
 	    scp->mibdata.
 	      dot3StatsCollFrequencies[scp->tx_collisions/sent + 1] += scp->tx_collisions%sent;
 	  }
 	}
 	scp->tx_collisions = 0;
       }
       ifp->if_timer = 0;
       ifp->if_flags &= ~IFF_OACTIVE;
     }
     if (txs & 0x0002) {		/* Excessive collisions (packet dropped) */
       ifp->if_collisions += 16;
       ifp->if_oerrors++;
       scp->tx_collisions = 0;
       scp->mibdata.dot3StatsExcessiveCollisions++;
       scp->mibdata.dot3StatsMultipleCollisionFrames++;
       scp->mibdata.dot3StatsCollFrequencies[15]++;
       XE_OUTB(XE_CR, XE_CR_RESTART_TX);
     }
     if (txs & 0x0040)		/* Transmit aborted -- probably collisions */
       scp->tx_collisions++;
 
 
     /*
      * Handle receive interrupts 
      */
     while ((esr = XE_INB(XE_ESR)) & XE_ESR_FULL_PACKET_RX) {
 
       if ((rsr = XE_INB(XE_RSR)) & XE_RSR_RX_OK) {
 	struct ether_header *ehp;
 	struct mbuf *mbp;
 	u_int16_t len;
 
 	len = XE_INW(XE_RBC);
 
 	if (len == 0)
 	  continue;
 
 #if 0
 	/*
 	 * Limit the amount of time we spend in this loop, dropping packets if 
 	 * necessary.  The Linux code does this with considerably more
 	 * finesse, adjusting the threshold dynamically.
 	 */
 	if ((rx_bytes += len) > 22000) {
 	  ifp->if_iqdrops++;
 	  scp->mibData.dot3StatsMissedFrames++;
 	  XE_OUTW(XE_DO, 0x8000);
 	  continue;
 	}
 #endif
 
 	if (len & 0x01)
 	  len++;
 
 	MGETHDR(mbp, M_DONTWAIT, MT_DATA);	/* Allocate a header mbuf */
 	if (mbp != NULL) {
 	  mbp->m_pkthdr.rcvif = ifp;
 	  mbp->m_pkthdr.len = mbp->m_len = len;
 
 	  /*
 	   * If the mbuf header isn't big enough for the packet, attach an
 	   * mbuf cluster to hold it.  The +2 is to allow for the nasty little 
 	   * alignment hack below.
 	   */
 	  if (len + 2 > MHLEN) {
 	    MCLGET(mbp, M_DONTWAIT);
 	    if ((mbp->m_flags & M_EXT) == 0) {
 	      m_freem(mbp);
 	      mbp = NULL;
 	    }
 	  }
 	}
 
 	if (mbp != NULL) {
 	  /*
 	   * The Ethernet header is 14 bytes long; thus the actual packet data 
 	   * won't be 32-bit aligned when it's dumped into the mbuf.  We
 	   * offset everything by 2 bytes to fix this.  Apparently the
 	   * alignment is important for NFS, damn its eyes.
 	   */
 	  mbp->m_data += 2;
 	  ehp = mtod(mbp, struct ether_header *);
 
 	  /*
 	   * Now get the packet, including the Ethernet header and trailer (?)
 	   * We use programmed I/O, because we don't know how to do shared
 	   * memory with these cards.  So yes, it's real slow, and heavy on
 	   * the interrupts (CPU on my P150 maxed out at ~950KBps incoming).
 	   */
 	  if (scp->srev == 0) {		/* Workaround a bug in old cards */
 	    u_short rhs;
 
 	    XE_SELECT_PAGE(5);
 	    rhs = XE_INW(XE_RHSA);
 	    XE_SELECT_PAGE(0);
 
 	    rhs += 3;			 /* Skip control info */
 
 	    if (rhs >= 0x8000)
 	      rhs = 0;
 
 	    if (rhs + len > 0x8000) {
 	      int i;
 
 	      /*
 	       * XXX - This i-- seems very wrong, but it's what the Linux guys 
 	       * XXX - do.  Need someone with an old CE2 to test this for me.
 	       * XXX - 99/3/28: Changed the first i-- to an i++, maybe that'll
 	       * XXX - fix it?  It seems as though the previous version would
 	       * XXX - have caused an infinite loop (what, another one?).
 	       */
 	      for (i = 0; i < len; i++, rhs++) {
 		((char *)ehp)[i] = XE_INB(XE_EDP);
 		if (rhs == 0x8000) {
 		  rhs = 0;
 		  i--;
 		}
 	      }
 	    }
 	    else
 	      insw(scp->dev->id_iobase+XE_EDP, ehp, len >> 1);
 	  }
 	  else
 	    insw(scp->dev->id_iobase+XE_EDP, ehp, len >> 1);
 
 #if NBPF > 0
 	  /*
 	   * Check if there's a BPF listener on this interface. If so, hand
 	   * off the raw packet to bpf.
 	   */
 	  if (ifp->if_bpf) {
 #if XE_DEBUG > 1
 	    printf("xe%d: passing input packet to BPF\n", scp->unit);
 #endif
 	    bpf_mtap(ifp, mbp);
 
 	    /*	
 	     * Note that the interface cannot be in promiscuous mode if there
 	     * are no BPF listeners.  And if we are in promiscuous mode, we
 	     * have to check if this packet is really ours.
 	     */
 	    if ((ifp->if_flags & IFF_PROMISC) &&
 		bcmp(ehp->ether_dhost, scp->arpcom.ac_enaddr, sizeof(ehp->ether_dhost)) != 0 &&
 		(rsr & XE_RSR_PHYS_PACKET)) {
 	      m_freem(mbp);
 	      mbp = NULL;
 	    }
 	  }
 #endif /* NBPF > 0 */
 
 	  if (mbp != NULL) {
 	    mbp->m_pkthdr.len = mbp->m_len = len - ETHER_HDR_LEN;
 	    mbp->m_data += ETHER_HDR_LEN;	/* Strip off Ethernet header */
 	    ether_input(ifp, ehp, mbp);		/* Send the packet on its way */
 	    ifp->if_ipackets++;			/* Success! */
 	  }
 	  XE_OUTW(XE_DO, 0x8000);		/* skip_rx_packet command */
 	}
       }
       else if (rsr & XE_RSR_LONG_PACKET) {	/* Packet length >1518 bytes */
 	scp->mibdata.dot3StatsFrameTooLongs++;
 	ifp->if_ierrors++;
       }
       else if (rsr & XE_RSR_CRC_ERROR) {	/* Bad checksum on packet */
 	scp->mibdata.dot3StatsFCSErrors++;
 	ifp->if_ierrors++;
       }
       else if (rsr & XE_RSR_ALIGN_ERROR) {	/* Packet alignment error */
 	scp->mibdata.dot3StatsAlignmentErrors++;
 	ifp->if_ierrors++;
       }
     }
     if (rxs & 0x10) {				/* Receiver overrun */
       scp->mibdata.dot3StatsInternalMacReceiveErrors++;
       ifp->if_ierrors++;
       XE_OUTB(XE_CR, XE_CR_CLEAR_OVERRUN);
     }
   }
 
   XE_SELECT_PAGE(psr);				/* Restore saved page */
   XE_OUTB(XE_CR, XE_CR_ENABLE_INTR);		/* Re-enable interrupts */
 
   /* Could force an int here, instead of dropping packets? */
   /* XE_OUTB(XE_CR, XE_CR_ENABLE_INTR|XE_CE_FORCE_INTR); */
 
   return result;
 }
 
 
 /*
  * Device timeout/watchdog routine.  Called automatically if we queue a packet 
  * for transmission but don't get an interrupt within a specified timeout
  * (usually 5 seconds).  When this happens we assume the worst and reset the
  * card.
  */
 static void
 xe_watchdog(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
 
   if (scp->gone) return;
 
   printf("xe%d: watchdog timeout; resetting card\n", scp->unit);
   scp->tx_timeouts++;
   ifp->if_oerrors += scp->tx_queued;
   xe_stop(scp);
   xe_hard_reset(scp);
   xe_setmedia(scp);
   xe_init(scp);
 }
 
 
 /*
  * Change media selection.
  */
 static int
 xe_media_change(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
 
 #ifdef XE_DEBUG
   printf("xe%d: media_change\n", ifp->if_unit);
 #endif
 
   if (IFM_TYPE(scp->ifm->ifm_media) != IFM_ETHER)
     return(EINVAL);
 
   /*
    * Some card/media combos aren't always possible -- filter those out here.
    */
   if ((IFM_SUBTYPE(scp->ifm->ifm_media) == IFM_AUTO ||
        IFM_SUBTYPE(scp->ifm->ifm_media) == IFM_100_TX) && !scp->phy_ok)
     return (EINVAL);
 
   xe_setmedia(scp);
 
   return 0;
 }
 
 
 /*
  * Return current media selection.
  */
 static void
 xe_media_status(struct ifnet *ifp, struct ifmediareq *mrp) {
 
 #ifdef XE_DEBUG
   printf("xe%d: media_status\n", ifp->if_unit);
 #endif
 
   mrp->ifm_active = ((struct xe_softc *)ifp->if_softc)->media;
 
   return;
 }
 
 
 /*
  * Select active media.
  */
 static void xe_setmedia(void *xscp) {
   struct xe_softc *scp = xscp;
   u_int16_t bmcr, bmsr, anar, lpar;
 
 #ifdef XE_DEBUG
   printf("xe%d: setmedia\n", scp->unit);
 #endif
 
   /* Cancel any pending timeout */
   untimeout(xe_setmedia, scp, scp->chand);
   xe_disable_intr(scp);
 
   /* Select media */
   scp->media = IFM_ETHER;
   switch (IFM_SUBTYPE(scp->ifm->ifm_media)) {
 
    case IFM_AUTO:	/* Autoselect media */
     scp->media = IFM_ETHER|IFM_AUTO;
 
     /*
      * Autoselection is really awful.  It goes something like this:
      *
      * Wait until the transmitter goes idle (2sec timeout).
      * Reset card
      *   IF a 100Mbit PHY exists
      *     Start NWAY autonegotiation (3.5sec timeout)
      *     IF that succeeds
      *       Select 100baseTX or 10baseT, whichever was detected
      *     ELSE
      *       Reset card
      *       IF a 100Mbit PHY exists
      *         Try to force a 100baseTX link (3sec timeout)
      *         IF that succeeds
      *           Select 100baseTX
      *         ELSE
      *           Disable the PHY
      *         ENDIF
      *       ENDIF
      *     ENDIF
      *   ENDIF
      * IF nothing selected so far
      *   IF a 100Mbit PHY exists
      *     Select 10baseT
      *   ELSE
      *     Select 10baseT or 10base2, whichever is connected
      *   ENDIF
      * ENDIF
      */
     switch (scp->autoneg_status) {
 
      case XE_AUTONEG_NONE:
 #if XE_DEBUG > 1
       printf("xe%d: Waiting for idle transmitter\n", scp->unit);
 #endif
       scp->arpcom.ac_if.if_flags |= IFF_OACTIVE;
       scp->autoneg_status = XE_AUTONEG_WAITING;
       scp->chand = timeout(xe_setmedia, scp, hz * 2);
       return;
 
      case XE_AUTONEG_WAITING:
       xe_soft_reset(scp);
       if (scp->phy_ok) {
 #if XE_DEBUG > 1
 	printf("xe%d: Starting autonegotiation\n", scp->unit);
 #endif
 	bmcr = xe_phy_readreg(scp, PHY_BMCR);
 	bmcr &= ~(PHY_BMCR_AUTONEGENBL);
 	xe_phy_writereg(scp, PHY_BMCR, bmcr);
 	anar = xe_phy_readreg(scp, PHY_ANAR);
 	anar &= ~(PHY_ANAR_100BT4|PHY_ANAR_100BTXFULL|PHY_ANAR_10BTFULL);
 	anar |= PHY_ANAR_100BTXHALF|PHY_ANAR_10BTHALF;
 	xe_phy_writereg(scp, PHY_ANAR, anar);
 	bmcr |= PHY_BMCR_AUTONEGENBL|PHY_BMCR_AUTONEGRSTR;
 	xe_phy_writereg(scp, PHY_BMCR, bmcr);
 	scp->autoneg_status = XE_AUTONEG_STARTED;
 	scp->chand = timeout(xe_setmedia, scp, hz * 7/2);
 	return;
       }
       else {
 	scp->autoneg_status = XE_AUTONEG_FAIL;
       }
       break;
 
      case XE_AUTONEG_STARTED:
       bmsr = xe_phy_readreg(scp, PHY_BMSR);
       lpar = xe_phy_readreg(scp, PHY_LPAR);
       if (bmsr & (PHY_BMSR_AUTONEGCOMP|PHY_BMSR_LINKSTAT)) {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation complete!\n", scp->unit);
 #endif
 	/*
 	 * XXX - Shouldn't have to do this, but (on my hub at least) the
 	 * XXX - transmitter won't work after a successful autoneg.  So we see 
 	 * XXX - what the negotiation result was and force that mode.  I'm
 	 * XXX - sure there is an easy fix for this.
 	 */
 	if (lpar & PHY_LPAR_100BTXHALF) {
 	  xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
 	  XE_MII_DUMP(scp);
 	  XE_SELECT_PAGE(2);
 	  XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	  scp->media = IFM_ETHER|IFM_100_TX;
 	  scp->autoneg_status = XE_AUTONEG_NONE;
 	}
 	else {
 	  /*
 	   * XXX - Bit of a hack going on in here.
 	   * XXX - This is derived from Ken Hughes patch to the Linux driver
 	   * XXX - to make it work with 10Mbit _autonegotiated_ links on CE3B
 	   * XXX - cards.  What's a CE3B and how's it differ from a plain CE3?
 	   * XXX - these are the things we need to find out.
 	   */
 	  xe_phy_writereg(scp, PHY_BMCR, 0x0000);
 	  XE_SELECT_PAGE(2);
 	  /* BEGIN HACK */
 	  XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	  XE_SELECT_PAGE(0x42);
 	  XE_OUTB(XE_SWC1, 0x80);
 	  scp->media = IFM_ETHER|IFM_10_T;
 	  scp->autoneg_status = XE_AUTONEG_NONE;
 	  /* END HACK */
 	  /*XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);*/	/* Disable PHY? */
 	  /*scp->autoneg_status = XE_AUTONEG_FAIL;*/
 	}
       }
       else {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation failed; trying 100baseTX\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	xe_soft_reset(scp);
 	if (scp->phy_ok) {
 	  xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
 	  scp->autoneg_status = XE_AUTONEG_100TX;
 	  scp->chand = timeout(xe_setmedia, scp, hz * 3);
 	  return;
 	}
 	else {
 	  scp->autoneg_status = XE_AUTONEG_FAIL;
 	}
       }
       break;
 
      case XE_AUTONEG_100TX:
       (void)xe_phy_readreg(scp, PHY_BMSR);
       bmsr = xe_phy_readreg(scp, PHY_BMSR);
       if (bmsr & PHY_BMSR_LINKSTAT) {
 #if XE_DEBUG > 1
 	printf("xe%d: Got 100baseTX link!\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	XE_SELECT_PAGE(2);
 	XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	scp->media = IFM_ETHER|IFM_100_TX;
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
       else {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation failed; disabling PHY\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	xe_phy_writereg(scp, PHY_BMCR, 0x0000);
 	XE_SELECT_PAGE(2);
 	XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);	/* Disable PHY? */
 	scp->autoneg_status = XE_AUTONEG_FAIL;
       }
       break;
     }
 
     /*
      * If we got down here _and_ autoneg_status is XE_AUTONEG_FAIL, then
      * either autonegotiation failed, or never got started to begin with.  In
      * either case, select a suitable 10Mbit media and hope it works.  We
      * don't need to reset the card again, since it will have been done
      * already by the big switch above.
      */
     if (scp->autoneg_status == XE_AUTONEG_FAIL) {
 #if XE_DEBUG > 1
       printf("xe%d: Selecting 10baseX\n", scp->unit);
 #endif
       if (scp->mohawk) {
 	XE_SELECT_PAGE(0x42);
 	XE_OUTB(XE_SWC1, 0x80);
 	scp->media = IFM_ETHER|IFM_10_T;
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
       else {
 	XE_SELECT_PAGE(4);
 	XE_OUTB(XE_GPR0, 4);
 	DELAY(50000);
 	XE_SELECT_PAGE(0x42);
 	XE_OUTB(XE_SWC1, (XE_INB(XE_ESR) & XE_ESR_MEDIA_SELECT) ? 0x80 : 0xc0);
 	scp->media = IFM_ETHER|((XE_INB(XE_ESR) & XE_ESR_MEDIA_SELECT) ? IFM_10_T : IFM_10_2);
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
     }
     break;
 
 
     /*
      * If a specific media has been requested, we just reset the card and
      * select it (one small exception -- if 100baseTX is requested by there is 
      * no PHY, we fall back to 10baseT operation).
      */
    case IFM_100_TX:	/* Force 100baseTX */
     xe_soft_reset(scp);
     if (scp->phy_ok) {
 #if XE_DEBUG > 1
       printf("xe%d: Selecting 100baseTX\n", scp->unit);
 #endif
       XE_SELECT_PAGE(0x42);
       XE_OUTB(XE_SWC1, 0);
       xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
       XE_SELECT_PAGE(2);
       XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
       scp->media |= IFM_100_TX;
       break;
     }
     /* FALLTHROUGH */
 
    case IFM_10_T:	/* Force 10baseT */
     xe_soft_reset(scp);
 #if XE_DEBUG > 1
     printf("xe%d: Selecting 10baseT\n", scp->unit);
 #endif
     if (scp->phy_ok) {
       xe_phy_writereg(scp, PHY_BMCR, 0x0000);
       XE_SELECT_PAGE(2);
       XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);	/* Disable PHY */
     }
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x80);
     scp->media |= IFM_10_T;
     break;
 
    case IFM_10_2:
     xe_soft_reset(scp);
 #if XE_DEBUG > 1
     printf("xe%d: Selecting 10base2\n", scp->unit);
 #endif
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0xc0);
     scp->media |= IFM_10_2;
     break;
   }
 
 
   /*
    * Finally, the LEDs are set to match whatever media was chosen and the
    * transmitter is unblocked. 
    */
 #if XE_DEBUG > 1
   printf("xe%d: Setting LEDs\n", scp->unit);
 #endif
   XE_SELECT_PAGE(2);
   switch (IFM_SUBTYPE(scp->media)) {
    case IFM_100_TX:
    case IFM_10_T:
     XE_OUTB(XE_LED, 0x3b);
     if (scp->dingo)
       XE_OUTB(0x0b, 0x04);	/* 100Mbit LED */
     break;
 
    case IFM_10_2:
     XE_OUTB(XE_LED, 0x3a);
     break;
   }
 
   /* Restart output? */
   scp->ifp->if_flags &= ~IFF_OACTIVE;
   xe_init(scp);
 }
 
 
 /*
  * Hard reset (power cycle) the card.
  */
 static void
 xe_hard_reset(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: hard_reset\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Power cycle the card.
    */
   XE_SELECT_PAGE(4);
   XE_OUTB(XE_GPR1, 0);		/* Power off */
   DELAY(40000);
 
   if (scp->mohawk)
     XE_OUTB(XE_GPR1, 1);	/* And back on again */
   else
     XE_OUTB(XE_GPR1, 5);	/* Also set AIC bit, whatever that is */
   DELAY(40000);
   XE_SELECT_PAGE(0);
 
   (void)splx(s);
 }
 
 
 /*
  * Soft reset the card.  Also makes sure that the ML6692 and 10Mbit controller 
  * are powered up, sets the silicon revision number in softc, disables
  * interrupts and checks for the prescence of a 100Mbit PHY.  This should
  * leave us in a position where we can access the PHY and do media
  * selection. The function imposes a 0.5s delay while the hardware powers up.
  */
 static void
 xe_soft_reset(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: soft_reset\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Reset the card, (again).
    */
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, XE_CR_SOFT_RESET);
   DELAY(40000);
   XE_OUTB(XE_CR, 0);
   DELAY(40000);
 
   if (scp->mohawk) {
     /*
      * set GP1 and GP2 as outputs (bits 2 & 3)
      * set GP1 low to power on the ML6692 (bit 0)
      * set GP2 high to power on the 10Mhz chip (bit 1)
      */
     XE_SELECT_PAGE(4);
     XE_OUTB(XE_GPR0, 0x0e);
   }
 
   /*
    * Wait for everything to wake up.
    */
   DELAY(500000);
 
   /*
    * Get silicon revision number.
    */
   XE_SELECT_PAGE(4);
   if (scp->mohawk)
     scp->srev = (XE_INB(XE_BOV) & 0x70) >> 4;
   else
     scp->srev = (XE_INB(XE_BOV) & 0x30) >> 4;
 #ifdef XE_DEBUG
   printf("xe%d: silicon revision = %d\n", scp->unit, scp->srev);
 #endif
   
   /*
    * Shut off interrupts.
    */
   xe_disable_intr(scp);
 
   /*
    * Check for PHY.
    */
   if (scp->mohawk) {
     scp->phy_ok = xe_mii_init(scp);
   }
 
   XE_SELECT_PAGE(0);
 
   (void)splx(s);
 }
 
 
 /*
  * Take interface offline.  This is done by powering down the device, which I
  * assume means just shutting down the transceiver and Ethernet logic.  This
  * requires a _hard_ reset to recover from, as we need to power up again.
  */
 static void
 xe_stop(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: stop\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Shut off interrupts.
    */
   xe_disable_intr(scp);
 
   /*
    * Power down.
    */
   XE_SELECT_PAGE(4);
   XE_OUTB(XE_GPR1, 0);
   XE_SELECT_PAGE(0);
 
   /*
    * ~IFF_RUNNING == interface down.
    */
   scp->ifp->if_flags &= ~IFF_RUNNING;
   scp->ifp->if_flags &= ~IFF_OACTIVE;
   scp->ifp->if_timer = 0;
 
   (void)splx(s);
 }
 
 
 /*
  * Enable Ethernet interrupts from the card.
  */
 static void
 xe_enable_intr(struct xe_softc *scp) {
 #ifdef XE_DEBUG
   printf("xe%d: enable_intr\n", scp->unit);
 #endif
 
   XE_SELECT_PAGE(1);
   XE_OUTB(XE_IMR0, 0xff);		/* Unmask everything */
   XE_OUTB(XE_IMR1, 0x01);		/* Unmask TX underrun detection */
   DELAY(1);
 
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, XE_CR_ENABLE_INTR);	/* Enable interrupts */
   if (scp->modem && !scp->dingo) {	/* This bit is just magic */
     if (!(XE_INB(0x10) & 0x01)) {
       XE_OUTB(0x10, 0x11);		/* Unmask master int enable bit */
     }
   }
 }
 
 
 /*
  * Disable all Ethernet interrupts from the card.
  */
 static void
 xe_disable_intr(struct xe_softc *scp) {
 #ifdef XE_DEBUG
   printf("xe%d: disable_intr\n", scp->unit);
 #endif
 
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, 0);			/* Disable interrupts */
   if (scp->modem && !scp->dingo) {	/* More magic (does this work?) */
     XE_OUTB(0x10, 0x10);		/* Mask the master int enable bit */
   }
 
   XE_SELECT_PAGE(1);
   XE_OUTB(XE_IMR0, 0);			/* Forbid all interrupts */
   XE_OUTB(XE_IMR1, 0);
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Set up multicast filter and promiscuous mode
  */
 static void
 xe_setmulti(struct xe_softc *scp) {
   struct ifnet *ifp;
   struct ifmultiaddr *maddr;
   int count;
 
   ifp = &scp->arpcom.ac_if;
   maddr = ifp->if_multiaddrs.lh_first;
 
   /* Get length of multicast list */
   for (count = 0; maddr != NULL; maddr = maddr->ifma_link.le_next, count++);
 
   if ((ifp->if_flags & IFF_PROMISC) || (ifp->if_flags & IFF_ALLMULTI) || (count > 9)) {
     /*
      * Go into promiscuous mode if either of the PROMISC or ALLMULTI flags are
      * set, or if we have been asked to deal with more than 9 multicast
      * addresses.  To do this: set MPE and PME in SWC1
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x06);
   }
   else if ((ifp->if_flags & IFF_MULTICAST) && (count > 0)) {
     /*
      * Program the filters for up to 9 addresses
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x01);
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_OFFLINE);
     /*xe_reg_dump(scp);*/
     xe_setaddrs(scp);
     /*xe_reg_dump(scp);*/
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_RX_ENABLE|XE_CMD0_ONLINE);
   }
   else {
     /*
      * No multicast operation (default)
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0);
   }
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Set up all on-chip addresses (for multicast).  AFAICS, there are 10
  * of these things; the first is our MAC address, the other 9 are mcast
  * addresses, padded with the MAC address if there aren't enough.
  * XXX - This doesn't work right, but I'm not sure why yet.  We seem to be
  * XXX - doing much the same as the Linux code, which is weird enough that
  * XXX - it's probably right (despite my earlier comments to the contrary).
  */
 static void
 xe_setaddrs(struct xe_softc *scp) {
   struct ifmultiaddr *maddr;
   u_int8_t *addr;
   u_int8_t page, slot, byte, i;
 
   maddr = scp->arpcom.ac_if.if_multiaddrs.lh_first;
 
   XE_SELECT_PAGE(page = 0x50);
 
   for (slot = 0, byte = 8; slot < 10; slot++) {
 
     if (slot == 0)
       addr = (u_int8_t *)(&scp->arpcom.ac_enaddr);
     else {
       while (maddr != NULL && maddr->ifma_addr->sa_family != AF_LINK)
 	maddr = maddr->ifma_link.le_next;
       if (maddr != NULL)
 	addr = LLADDR((struct sockaddr_dl *)maddr->ifma_addr);
       else
 	addr = (u_int8_t *)(&scp->arpcom.ac_enaddr);
     }
 
     for (i = 0; i < 6; i++, byte++) {
 #if XE_DEBUG > 2
       if (i)
 	printf(":%x", addr[i]);
       else
 	printf("xe%d: individual addresses %d: %x", scp->unit, slot, addr[0]);
 #endif
 
       if (byte > 15) {
 	page++;
 	byte = 8;
 	XE_SELECT_PAGE(page);
       }
 
       if (scp->mohawk)
 	XE_OUTB(byte, addr[5 - i]);
       else
 	XE_OUTB(byte, addr[i]);
     }
 #if XE_DEBUG > 2
     printf("\n");
 #endif
   }
 
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Write an outgoing packet to the card using programmed I/O.
  */
 static int
 xe_pio_write_packet(struct xe_softc *scp, struct mbuf *mbp) {
   struct mbuf *mbp2;
   u_int16_t len, pad, free, ok;
   u_int8_t *data;
   u_int8_t savebyte[2], wantbyte;
 
   /* Get total packet length */
   for (len = 0, mbp2 = mbp; mbp2 != NULL; len += mbp2->m_len, mbp2 = mbp2->m_next);
 
   /* Packets < minimum length may need to be padded out */
   pad = 0;
   if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) {
     pad = (ETHER_MIN_LEN - ETHER_CRC_LEN - len + 1) >> 1;
     len = ETHER_MIN_LEN - ETHER_CRC_LEN;
   }
 
   /* Check transmit buffer space */
   XE_SELECT_PAGE(0);
   XE_OUTW(XE_TRS, len+2);
   free = XE_INW(XE_TSO);
   ok = free & 0x8000;
   free &= 0x7fff;
   if (free <= len + 2)
     return 1;
 
   /* Send packet length to card */
   XE_OUTW(XE_EDP, len);
 
   /*
    * Write packet to card using PIO (code stolen from the ed driver)
    */
   wantbyte = 0;
   while (mbp != NULL) {
     len = mbp->m_len;
     if (len > 0) {
       data = mtod(mbp, caddr_t);
       if (wantbyte) {		/* Finish the last word */
 	savebyte[1] = *data;
 	XE_OUTW(XE_EDP, *(u_short *)savebyte);
 	data++;
 	len--;
 	wantbyte = 0;
       }
       if (len > 1) {		/* Output contiguous words */
 	outsw(scp->dev->id_iobase+XE_EDP, data, len >> 1);
 	data += len & ~1;
 	len &= 1;
       }
       if (len == 1) {		/* Save last byte, if necessary */
 	savebyte[0] = *data;
 	wantbyte = 1;
       }
     }
     mbp = mbp->m_next;
   }
   if (wantbyte)			/* Last byte for odd-length packets */
     XE_OUTW(XE_EDP, *(u_short *)savebyte);
 
   /*
    * For CE3 cards, just tell 'em to send -- apparently the card will pad out
    * short packets with random cruft.  Otherwise, write nonsense words to fill 
    * out the packet.  I guess it is then sent automatically (?)
    */
   if (scp->mohawk)
     XE_OUTB(XE_CR, XE_CR_TX_PACKET|XE_CR_ENABLE_INTR);
   else
     while (pad > 0) {
       XE_OUTW(XE_EDP, 0xdead);
       pad--;
     }
 
   return 0;
 }
 
 
 /*
  * The device entry is being removed, probably because someone ejected the
  * card.  The interface should have been brought down manually before calling
  * this function; if not you may well lose packets.  In any case, I shut down
  * the card and the interface, and hope for the best.  The 'gone' flag is set, 
  * so hopefully no-one else will try to access the missing card.
  */
 static void
 xe_card_unload(struct pccard_devinfo *devi) {
   struct xe_softc *scp;
   struct ifnet *ifp;
   int unit;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   ifp = &scp->arpcom.ac_if;
 
   if (scp->gone) {
     printf("xe%d: already unloaded\n", unit);
     return;
   }
 
   if_down(ifp);
   ifp->if_flags &= ~(IFF_RUNNING|IFF_OACTIVE);
   xe_stop(scp);
   scp->gone = 1;
 }
 
 
 /*
  * Compute the 32-bit Ethernet CRC for the given buffer.
  */
 static u_int32_t
 xe_compute_crc(u_int8_t *data, int len) {
   u_int32_t crc = 0xffffffff;
   u_int32_t poly = 0x04c11db6;
   u_int8_t current, crc31, bit;
   int i, k;
 
   for (i = 0; i < len; i++) {
     current = data[i];
     for (k = 1; k <= 8; k++) {
       if (crc & 0x80000000) {
 	crc31 = 0x01;
       }
       else {
 	crc31 = 0;
       }
       bit = crc31 ^ (current & 0x01);
       crc <<= 1;
       current >>= 1;
       if (bit) {
 	crc = (crc ^ poly)|1;
       }
     }
   }
   return crc;
 }
 
 
 /*
  * Convert a CRC into an index into the multicast hash table.  What we do is
  * take the most-significant 6 bits of the CRC, reverse them, and use that as
  * the bit number in the hash table.  Bits 5:3 of the result give the byte
  * within the table (0-7); bits 2:0 give the bit number within that byte (also 
  * 0-7), ie. the number of shifts needed to get it into the lsb position.
  */
 static int
 xe_compute_hashbit(u_int32_t crc) {
   u_int8_t hashbit = 0;
   int i;
 
   for (i = 0; i < 6; i++) {
     hashbit >>= 1;
     if (crc & 0x80000000) {
       hashbit &= 0x80;
     }
     crc <<= 1;
   }
   return (hashbit >> 2);
 }
 
 
 
 /**************************************************************
  *                                                            *
  *                  M I I  F U N C T I O N S                  *
  *                                                            *
  **************************************************************/
 
 /*
  * Alternative MII/PHY handling code adapted from the xl driver.  It doesn't
  * seem to work any better than the xirc2_ps stuff, but it's cleaner code.
  * XXX - this stuff shouldn't be here.  It should all be abstracted off to
  * XXX - some kind of common MII-handling code, shared by all drivers.  But
  * XXX - that's a whole other mission.
  */
 #define XE_MII_SET(x)	XE_OUTB(XE_GPR2, (XE_INB(XE_GPR2) | 0x04) | (x))
 #define XE_MII_CLR(x)	XE_OUTB(XE_GPR2, (XE_INB(XE_GPR2) | 0x04) & ~(x))
 
 
 /*
  * Sync the PHYs by setting data bit and strobing the clock 32 times.
  */
 static void
 xe_mii_sync(struct xe_softc *scp) {
   register int i;
 
   XE_SELECT_PAGE(2);
   XE_MII_SET(XE_MII_DIR|XE_MII_WRD);
 
   for (i = 0; i < 32; i++) {
     XE_MII_SET(XE_MII_CLK);
     DELAY(1);
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
   }
 }
 
 
 /*
  * Look for a MII-compliant PHY.  If we find one, reset it.
  */
 static int
 xe_mii_init(struct xe_softc *scp) {
   u_int16_t status;
 
   status = xe_phy_readreg(scp, PHY_BMSR);
   if ((status & 0xff00) != 0x7800) {
 #if XE_DEBUG > 1
     printf("xe%d: no PHY found, %0x\n", scp->unit, status);
 #endif
     return 0;
   }
   else {
 #if XE_DEBUG > 1
     printf("xe%d: PHY OK!\n", scp->unit);
 #endif
 
     /* Reset the PHY */
     xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_RESET);
     DELAY(500);
     while(xe_phy_readreg(scp, PHY_BMCR) & PHY_BMCR_RESET);
     XE_MII_DUMP(scp);
     return 1;
   }
 }
 
 
 /*
  * Clock a series of bits through the MII.
  */
 static void
 xe_mii_send(struct xe_softc *scp, u_int32_t bits, int cnt) {
   int i;
 
   XE_SELECT_PAGE(2);
   XE_MII_CLR(XE_MII_CLK);
   
   for (i = (0x1 << (cnt - 1)); i; i >>= 1) {
     if (bits & i) {
       XE_MII_SET(XE_MII_WRD);
     } else {
       XE_MII_CLR(XE_MII_WRD);
     }
     DELAY(1);
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
     XE_MII_SET(XE_MII_CLK);
   }
 }
 
 
 /*
  * Read an PHY register through the MII.
  */
 static int
 xe_mii_readreg(struct xe_softc *scp, struct xe_mii_frame *frame) {
   int i, ack, s;
 
   s = splimp();
 
   /*
    * Set up frame for RX.
    */
   frame->mii_stdelim = XE_MII_STARTDELIM;
   frame->mii_opcode = XE_MII_READOP;
   frame->mii_turnaround = 0;
   frame->mii_data = 0;
 	
   XE_SELECT_PAGE(2);
   XE_OUTB(XE_GPR2, 0);
 
   /*
    * Turn on data xmit.
    */
   XE_MII_SET(XE_MII_DIR);
 
   xe_mii_sync(scp);
 
   /*	
    * Send command/address info.
    */
   xe_mii_send(scp, frame->mii_stdelim, 2);
   xe_mii_send(scp, frame->mii_opcode, 2);
   xe_mii_send(scp, frame->mii_phyaddr, 5);
   xe_mii_send(scp, frame->mii_regaddr, 5);
 
   /* Idle bit */
   XE_MII_CLR((XE_MII_CLK|XE_MII_WRD));
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
 
   /* Turn off xmit. */
   XE_MII_CLR(XE_MII_DIR);
 
   /* Check for ack */
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
   ack = XE_INB(XE_GPR2) & XE_MII_RDD;
 
   /*
    * Now try reading data bits. If the ack failed, we still
    * need to clock through 16 cycles to keep the PHY(s) in sync.
    */
   if (ack) {
     for(i = 0; i < 16; i++) {
       XE_MII_CLR(XE_MII_CLK);
       DELAY(1);
       XE_MII_SET(XE_MII_CLK);
       DELAY(1);
     }
     goto fail;
   }
 
   for (i = 0x8000; i; i >>= 1) {
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
     if (!ack) {
       if (XE_INB(XE_GPR2) & XE_MII_RDD)
 	frame->mii_data |= i;
       DELAY(1);
     }
     XE_MII_SET(XE_MII_CLK);
     DELAY(1);
   }
 
 fail:
 
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
 
   splx(s);
 
   if (ack)
     return(1);
   return(0);
 }
 
 
 /*
  * Write to a PHY register through the MII.
  */
 static int
 xe_mii_writereg(struct xe_softc *scp, struct xe_mii_frame *frame) {
   int s;
 
   s = splimp();
 
   /*
    * Set up frame for TX.
    */
   frame->mii_stdelim = XE_MII_STARTDELIM;
   frame->mii_opcode = XE_MII_WRITEOP;
   frame->mii_turnaround = XE_MII_TURNAROUND;
 	
   XE_SELECT_PAGE(2);
 
   /*		
    * Turn on data output.
    */
   XE_MII_SET(XE_MII_DIR);
 
   xe_mii_sync(scp);
 
   xe_mii_send(scp, frame->mii_stdelim, 2);
   xe_mii_send(scp, frame->mii_opcode, 2);
   xe_mii_send(scp, frame->mii_phyaddr, 5);
   xe_mii_send(scp, frame->mii_regaddr, 5);
   xe_mii_send(scp, frame->mii_turnaround, 2);
   xe_mii_send(scp, frame->mii_data, 16);
 
   /* Idle bit. */
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
 
   /*
    * Turn off xmit.
    */
   XE_MII_CLR(XE_MII_DIR);
 
   splx(s);
 
   return(0);
 }
 
 
 /*
  * Read a register from the PHY.
  */
 static u_int16_t
 xe_phy_readreg(struct xe_softc *scp, u_int16_t reg) {
   struct xe_mii_frame frame;
 
   bzero((char *)&frame, sizeof(frame));
 
   frame.mii_phyaddr = 0;
   frame.mii_regaddr = reg;
   xe_mii_readreg(scp, &frame);
 
   return(frame.mii_data);
 }
 
 
 /*
  * Write to a PHY register.
  */
 static void
 xe_phy_writereg(struct xe_softc *scp, u_int16_t reg, u_int16_t data) {
   struct xe_mii_frame frame;
 
   bzero((char *)&frame, sizeof(frame));
 
   frame.mii_phyaddr = 0;
   frame.mii_regaddr = reg;
   frame.mii_data = data;
   xe_mii_writereg(scp, &frame);
 
   return;
 }
 
 
 #ifdef XE_DEBUG
 /*
  * A bit of debugging code.
  */
 static void
 xe_mii_dump(struct xe_softc *scp) {
   int i, s;
 
   s = splimp();
 
   printf("xe%d: MII registers: ", scp->unit);
   for (i = 0; i < 2; i++) {
     printf(" %d:%04x", i, xe_phy_readreg(scp, i));
   }
   for (i = 4; i < 7; i++) {
     printf(" %d:%04x", i, xe_phy_readreg(scp, i));
   }
   printf("\n");
 
   (void)splx(s);
 }
 
 static void
 xe_reg_dump(struct xe_softc *scp) {
   int page, i, s;
 
   s = splimp();
 
   printf("xe%d: Common registers: ", scp->unit);
   for (i = 0; i < 8; i++) {
     printf(" %2.2x", XE_INB(i));
   }
   printf("\n");
 
   for (page = 0; page <= 8; page++) {
     printf("xe%d: Register page %2.2x: ", scp->unit, page);
     XE_SELECT_PAGE(page);
     for (i = 8; i < 16; i++) {
       printf(" %2.2x", XE_INB(i));
     }
     printf("\n");
   }
 
   for (page = 0x10; page < 0x5f; page++) {
     if ((page >= 0x11 && page <= 0x3f) ||
 	(page == 0x41) ||
 	(page >= 0x43 && page <= 0x4f) ||
 	(page >= 0x59))
       continue;
     printf("xe%d: Register page %2.2x: ", scp->unit, page);
     XE_SELECT_PAGE(page);
     for (i = 8; i < 16; i++) {
       printf(" %2.2x", XE_INB(i));
     }
     printf("\n");
   }
 
   (void)splx(s);
 }
 #endif
 
 
 
 #if NAPM > 0
 /**************************************************************
  *                                                            *
  *                  A P M  F U N C T I O N S                  *
  *                                                            *
  **************************************************************/
 
 /*
  * This is called when we go into suspend/standby mode
  */
 static int
 xe_suspend(void *xunit) {
 
 #ifdef XE_DEBUG
   struct xe_softc *scp = sca[(int)xunit];
 
   printf("xe%d: APM suspend\n", scp->unit);
 #endif
 
   return 0;
 }
 
 /*
  * This is called when we wake up again
  */
 static int
 xe_resume(void *xunit) {
 
 #ifdef XE_DEBUG
   struct xe_softc *scp = sca[(int)xunit];
 
   printf("xe%d: APM resume\n", scp->unit);
 #endif
 
   return 0;
 }
 
 #endif /* NAPM > 0 */
 
 #endif /* NCARD > 0 */
 
 #endif /* NXE > 0 */
Index: head/sys/dev/vinum/vinuminterrupt.c
===================================================================
--- head/sys/dev/vinum/vinuminterrupt.c	(revision 49534)
+++ head/sys/dev/vinum/vinuminterrupt.c	(revision 49535)
@@ -1,431 +1,430 @@
 /* vinuminterrupt.c: bottom half of the driver */
 
 /*-
  * Copyright (c) 1997, 1998, 1999
  *	Nan Yang Computer Services Limited.  All rights reserved.
  *
  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
  *
  *  Written by Greg Lehey
  *
  *  This software is distributed under the so-called ``Berkeley
  *  License'':
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Nan Yang Computer
  *      Services Limited.
  * 4. Neither the name of the Company nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * This software is provided ``as is'', and any express or implied
  * warranties, including, but not limited to, the implied warranties of
  * merchantability and fitness for a particular purpose are disclaimed.
  * In no event shall the company or contributors be liable for any
  * direct, indirect, incidental, special, exemplary, or consequential
  * damages (including, but not limited to, procurement of substitute
  * goods or services; loss of use, data, or profits; or business
  * interruption) however caused and on any theory of liability, whether
  * in contract, strict liability, or tort (including negligence or
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *
- * $Id: vinuminterrupt.c,v 1.6 1999/06/18 00:50:53 grog Exp grog $
+ * $Id: vinuminterrupt.c,v 1.12 1999/08/07 08:06:30 grog Exp $
  */
 
 #include <dev/vinum/vinumhdr.h>
 #include <dev/vinum/request.h>
-#include <miscfs/specfs/specdev.h>
 #include <sys/resourcevar.h>
 
 void complete_raid5_write(struct rqelement *);
 void freerq(struct request *rq);
 void free_rqg(struct rqgroup *rqg);
 void complete_rqe(struct buf *bp);
 void sdio_done(struct buf *bp);
 
 /*
  * Take a completed buffer, transfer the data back if
  * it's a read, and complete the high-level request
  * if this is the last subrequest.
  *
  * The bp parameter is in fact a struct rqelement, which
  * includes a couple of extras at the end.
  */
 void 
 complete_rqe(struct buf *bp)
 {
     struct rqelement *rqe;
     struct request *rq;
     struct rqgroup *rqg;
     struct buf *ubp;					    /* user buffer */
 
     rqe = (struct rqelement *) bp;			    /* point to the element element that completed */
     rqg = rqe->rqg;					    /* and the request group */
     rq = rqg->rq;					    /* and the complete request */
     ubp = rq->bp;					    /* user buffer */
 
 #ifdef VINUMDEBUG
     if (debug & DEBUG_LASTREQS)
 	logrq(loginfo_iodone, (union rqinfou) rqe, ubp);
 #endif
     if ((bp->b_flags & B_ERROR) != 0) {			    /* transfer in error */
 	if (bp->b_error != 0)				    /* did it return a number? */
 	    rq->error = bp->b_error;			    /* yes, put it in. */
 	else if (rq->error == 0)			    /* no: do we have one already? */
 	    rq->error = EIO;				    /* no: catchall "I/O error" */
 	SD[rqe->sdno].lasterror = rq->error;
 	if (bp->b_flags & B_READ) {
 	    log(LOG_ERR, "%s: fatal read I/O error\n", SD[rqe->sdno].name);
 	    set_sd_state(rqe->sdno, sd_crashed, setstate_force); /* subdisk is crashed */
 	} else {					    /* write operation */
 	    log(LOG_ERR, "%s: fatal write I/O error\n", SD[rqe->sdno].name);
 	    set_sd_state(rqe->sdno, sd_stale, setstate_force); /* subdisk is stale */
 	}
 	if (rq->error == ENXIO) {			    /* the drive's down too */
 	    log(LOG_ERR, "%s: fatal drive I/O error\n", DRIVE[rqe->driveno].label.name);
 	    DRIVE[rqe->driveno].lasterror = rq->error;
 	    set_drive_state(rqe->driveno,		    /* take the drive down */
 		drive_down,
 		setstate_force);
 	}
     }
     /* Now update the statistics */
     if (bp->b_flags & B_READ) {				    /* read operation */
 	DRIVE[rqe->driveno].reads++;
 	DRIVE[rqe->driveno].bytes_read += bp->b_bcount;
 	SD[rqe->sdno].reads++;
 	SD[rqe->sdno].bytes_read += bp->b_bcount;
 	PLEX[rqe->rqg->plexno].reads++;
 	PLEX[rqe->rqg->plexno].bytes_read += bp->b_bcount;
     } else {						    /* write operation */
 	DRIVE[rqe->driveno].writes++;
 	DRIVE[rqe->driveno].bytes_written += bp->b_bcount;
 	SD[rqe->sdno].writes++;
 	SD[rqe->sdno].bytes_written += bp->b_bcount;
 	PLEX[rqe->rqg->plexno].writes++;
 	PLEX[rqe->rqg->plexno].bytes_written += bp->b_bcount;
     }
     rqg->active--;					    /* one less request active */
     if (rqg->flags & XFR_RECOVERY_READ) {		    /* recovery read, */
 	int *sdata;					    /* source */
 	int *data;					    /* and group data */
 	int length;					    /* and count involved */
 	int count;					    /* loop counter */
 	struct rqelement *urqe = &rqg->rqe[rqg->badsdno];   /* rqe of the bad subdisk */
 
 	/* XOR destination is the user data */
 	sdata = (int *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT];	/* old data contents */
 	data = (int *) &urqe->b.b_data[urqe->groupoffset << DEV_BSHIFT]; /* destination */
 	length = urqe->grouplen << (DEV_BSHIFT - 2);	    /* and count involved */
 
 	for (count = 0; count < length; count++)
 	    data[count] ^= sdata[count];
 
 #ifdef VINUMDEBUG
 	if (debug & DEBUG_RESID) {
 	    if ((rqg->active == 0)			    /* XXXX finished this group */
 	    &&(*(char *) data != '<'))			    /* and not what we expected */
 		Debugger("complete_request checksum");
 	}
 #endif
 
 	/*
 	 * In a normal read, we will normally read directly
 	 * into the user buffer.  This doesn't work if
 	 * we're also doing a recovery, so we have to
 	 * copy it 
 	 */
 	if (rqe->flags & XFR_NORMAL_READ) {		    /* normal read as well, */
 	    char *src = &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* read data is here */
 	    char *dst;
 
 	    dst = (char *) ubp->b_data + (rqe->useroffset << DEV_BSHIFT); /* where to put it in user buffer */
 	    length = rqe->datalen << DEV_BSHIFT;	    /* and count involved */
 	    bcopy(src, dst, length);			    /* move it */
 	}
     } else if ((rqg->flags & (XFR_NORMAL_WRITE | XFR_DEGRADED_WRITE)) /* RAID 5 group write operation  */
     &&(rqg->active == 0))				    /* and we've finished phase 1 */
 	complete_raid5_write(rqe);
     if (rqg->active == 0)				    /* request group finished, */
 	rq->active--;					    /* one less */
     if (rq->active == 0) {				    /* request finished, */
 #if VINUMDEBUG
 	if (debug & DEBUG_RESID) {
 	    if (ubp->b_resid != 0)			    /* still something to transfer? */
 		Debugger("resid");
 
 	    {
 		int i;
 		for (i = 0; i < ubp->b_bcount; i += 512)    /* XXX debug */
 		    if (((char *) ubp->b_data)[i] != '<') { /* and not what we expected */
 			log(LOG_DEBUG,
 			    "At 0x%x (offset 0x%x): '%c' (0x%x)\n",
 			    (int) (&((char *) ubp->b_data)[i]),
 			    i,
 			    ((char *) ubp->b_data)[i],
 			    ((char *) ubp->b_data)[i]);
 			Debugger("complete_request checksum");
 		    }
 	    }
 	}
 #endif
 
 	if (rq->error) {				    /* did we have an error? */
 	    if (rq->isplex) {				    /* plex operation, */
 		ubp->b_flags |= B_ERROR;		    /* yes, propagate to user */
 		ubp->b_error = rq->error;
 	    } else					    /* try to recover */
 		queue_daemon_request(daemonrq_ioerror, (union daemoninfo) rq); /* let the daemon complete */
 	} else {
 	    ubp->b_resid = 0;				    /* completed our transfer */
 	    if (rq->isplex == 0)			    /* volume request, */
 		VOL[rq->volplex.volno].active--;	    /* another request finished */
 	    biodone(ubp);				    /* top level buffer completed */
 	    freerq(rq);					    /* return the request storage */
 	}
     }
 }
 
 
 /* Free a request block and anything hanging off it */
 void 
 freerq(struct request *rq)
 {
     struct rqgroup *rqg;
     struct rqgroup *nrqg;				    /* next in chain */
     int rqno;
 
     for (rqg = rq->rqg; rqg != NULL; rqg = nrqg) {	    /* through the whole request chain */
 	for (rqno = 0; rqno < rqg->count; rqno++)
 	    if ((rqg->rqe[rqno].flags & XFR_MALLOCED)	    /* data buffer was malloced, */
 	    &&rqg->rqe[rqno].b.b_data)			    /* and the allocation succeeded */
 		Free(rqg->rqe[rqno].b.b_data);		    /* free it */
 	nrqg = rqg->next;				    /* note the next one */
 	Free(rqg);					    /* and free this one */
     }
     Free(rq);						    /* free the request itself */
 }
 
 void 
 free_rqg(struct rqgroup *rqg)
 {
     if ((rqg->flags & XFR_GROUPOP)			    /* RAID 5 request */
 &&(rqg->rqe) /* got a buffer structure */
     &&(rqg->rqe->b.b_data))				    /* and it has a buffer allocated */
 	Free(rqg->rqe->b.b_data);			    /* free it */
 }
 
 /* I/O on subdisk completed */
 void 
 sdio_done(struct buf *bp)
 {
     struct sdbuf *sbp;
 
     sbp = (struct sdbuf *) bp;
     if (sbp->b.b_flags & B_ERROR) {			    /* had an error */
 	bp->b_flags |= B_ERROR;
 	bp->b_error = sbp->b.b_error;
     }
     bp->b_resid = sbp->b.b_resid;
     biodone(sbp->bp);					    /* complete the caller's I/O */
     /* Now update the statistics */
     if (bp->b_flags & B_READ) {				    /* read operation */
 	DRIVE[sbp->driveno].reads++;
 	DRIVE[sbp->driveno].bytes_read += bp->b_bcount;
 	SD[sbp->sdno].reads++;
 	SD[sbp->sdno].bytes_read += bp->b_bcount;
     } else {						    /* write operation */
 	DRIVE[sbp->driveno].writes++;
 	DRIVE[sbp->driveno].bytes_written += bp->b_bcount;
 	SD[sbp->sdno].writes++;
 	SD[sbp->sdno].bytes_written += bp->b_bcount;
     }
     Free(sbp);
 }
 
 /* Start the second phase of a RAID5 group write operation. */
 /*
  * XXX This could be improved on.  It's quite CPU intensive,
  * and doing it at the end tends to lump it all together.
  * We should do this a transfer at a time 
  */
 void 
 complete_raid5_write(struct rqelement *rqe)
 {
     int *sdata;						    /* source */
     int *pdata;						    /* and parity block data */
     int length;						    /* and count involved */
     int count;						    /* loop counter */
     int rqno;						    /* request index */
     int rqoffset;					    /* offset of request data from parity data */
     struct buf *bp;					    /* user buffer header */
     struct request *rq;					    /* pointer to our request */
     struct rqgroup *rqg;				    /* and to the request group */
     struct rqelement *prqe;				    /* point to the parity block */
     struct drive *drive;				    /* drive to access */
 
     rqg = rqe->rqg;					    /* and to our request group */
     rq = rqg->rq;					    /* point to our request */
     bp = rq->bp;					    /* user's buffer header */
     prqe = &rqg->rqe[0];				    /* point to the parity block */
 
     /*
      * If we get to this function, we have normal or
      * degraded writes, or a combination of both.  We do
      * the same thing in each case: we perform an
      * exclusive or to the parity block.  The only
      * difference is the origin of the data and the
      * address range. 
      */
 
     if (rqe->flags & XFR_DEGRADED_WRITE) {		    /* do the degraded write stuff */
 	pdata = (int *) (&prqe->b.b_data[(prqe->groupoffset) << DEV_BSHIFT]); /* parity data pointer */
 	bzero(pdata, prqe->grouplen << DEV_BSHIFT);	    /* start with nothing in the parity block */
 
 	/* Now get what data we need from each block */
 	for (rqno = 1; rqno < rqg->count; rqno++) {	    /* for all the data blocks */
 	    /*
 	     * This can do with improvement.  If we're doing
 	     * both a degraded and a normal write, we don't
 	     * need to xor (nor to read) the part of the block
 	     * that we're going to overwrite.  FIXME XXX 
 	     */
 	    rqe = &rqg->rqe[rqno];			    /* this request */
 	    sdata = (int *) (&rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]); /* old data */
 	    length = rqe->grouplen << (DEV_BSHIFT - 2);	    /* and count involved */
 
 	    /*
 	     * add the data block to the parity block.  Before
 	     * we started the request, we zeroed the parity
 	     * block, so the result of adding all the other
 	     * blocks and the block we want to write will be
 	     * the correct parity block.  
 	     */
 	    /* XXX do this in assembler */
 	    for (count = 0; count < length; count++)
 		pdata[count] ^= sdata[count];
 	    if ((rqe->flags & XFR_MALLOCED)		    /* the buffer was malloced, */
 	    &&((rqg->flags & XFR_NORMAL_WRITE) == 0)) {	    /* and we have no normal write, */
 		Free(rqe->b.b_data);			    /* free it now */
 		rqe->flags &= ~XFR_MALLOCED;
 	    }
 	}
     }
     if (rqg->flags & XFR_NORMAL_WRITE) {		    /* do normal write stuff */
 	/* Get what data we need from each block */
 	for (rqno = 1; rqno < rqg->count; rqno++) {	    /* for all the data blocks */
 	    rqe = &rqg->rqe[rqno];			    /* this request */
 	    if ((rqe->flags & (XFR_DATA_BLOCK | XFR_BAD_SUBDISK | XFR_NORMAL_WRITE))
 		== (XFR_DATA_BLOCK | XFR_NORMAL_WRITE)) {   /* good data block to write */
 		sdata = (int *) &rqe->b.b_data[rqe->dataoffset << DEV_BSHIFT]; /* old data contents */
 		rqoffset = rqe->dataoffset + rqe->sdoffset - prqe->sdoffset; /* corresponding parity block offset */
 		pdata = (int *) (&prqe->b.b_data[rqoffset << DEV_BSHIFT]); /* parity data pointer */
 		length = rqe->datalen << (DEV_BSHIFT - 2);  /* and count involved */
 		/*
 		 * "remove" the old data block
 		 * from the parity block 
 		 */
 		/* XXX do this in assembler */
 		if ((pdata < ((int *) prqe->b.b_data))
 		    || (&pdata[length] > ((int *) (prqe->b.b_data + prqe->b.b_bcount)))
 		    || (sdata < ((int *) rqe->b.b_data))
 		    || (&sdata[length] > ((int *) (rqe->b.b_data + rqe->b.b_bcount))))
 		    Debugger("Bounds overflow");	    /* XXX */
 		for (count = 0; count < length; count++)
 		    pdata[count] ^= sdata[count];
 
 		/* "add" the new data block */
 		sdata = (int *) (&bp->b_data[rqe->useroffset << DEV_BSHIFT]); /* new data */
 		if ((sdata < ((int *) bp->b_data))
 		    || (&sdata[length] > ((int *) (bp->b_data + bp->b_bcount))))
 		    Debugger("Bounds overflow");	    /* XXX */
 		for (count = 0; count < length; count++)
 		    pdata[count] ^= sdata[count];
 
 		/* Free the malloced buffer */
 		if (rqe->flags & XFR_MALLOCED) {	    /* the buffer was malloced, */
 		    Free(rqe->b.b_data);		    /* free it */
 		    rqe->flags &= ~XFR_MALLOCED;
 		} else
 		    Debugger("not malloced");		    /* XXX */
 
 		if ((rqe->b.b_flags & B_READ)		    /* this was a read */
 		&&((rqe->flags & XFR_BAD_SUBDISK) == 0)) {  /* and we can write this block */
 		    rqe->b.b_flags &= ~(B_READ | B_DONE);   /* we're writing now */
 		    rqe->b.b_flags |= B_CALL;		    /* call us when you're done */
 		    rqe->flags &= ~XFR_PARITYOP;	    /* reset flags that brought use here */
 		    rqe->b.b_data = &bp->b_data[rqe->useroffset << DEV_BSHIFT];	/* point to the user data */
 		    rqe->b.b_bcount = rqe->datalen << DEV_BSHIFT; /* length to write */
 		    rqe->b.b_bufsize = rqe->b.b_bcount;	    /* don't claim more */
 		    rqe->b.b_resid = rqe->b.b_bcount;	    /* nothing transferred */
 		    rqe->b.b_blkno += rqe->dataoffset;	    /* point to the correct block */
 		    rqg->active++;			    /* another active request */
 		    rqe->b.b_vp->v_numoutput++;		    /* one more output going */
 		    drive = &DRIVE[rqe->driveno];	    /* drive to access */
 #if VINUMDEBUG
 		    if (debug & DEBUG_ADDRESSES)
 			log(LOG_DEBUG,
 			    "  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
 			    rqe->b.b_flags & B_READ ? "Read" : "Write",
 			    major(rqe->b.b_dev),
 			    minor(rqe->b.b_dev),
 			    rqe->sdno,
 			    (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
 			    rqe->b.b_blkno,
 			    rqe->b.b_bcount);		    /* XXX */
 		    if (debug & DEBUG_NUMOUTPUT)
 			log(LOG_DEBUG,
 			    "  raid5.2 sd %d numoutput %ld\n",
 			    rqe->sdno,
 			    rqe->b.b_vp->v_numoutput);
 		    if (debug & DEBUG_LASTREQS)
 			logrq(loginfo_raid5_data, (union rqinfou) rqe, bp);
 #endif
 		    (*bdevsw(rqe->b.b_dev)->d_strategy) (&rqe->b);
 		}
 	    }
 	}
     }
     /* Finally, write the parity block */
     rqe = &rqg->rqe[0];
     rqe->b.b_flags &= ~(B_READ | B_DONE);		    /* we're writing now */
     rqe->b.b_flags |= B_CALL;				    /* call us when you're done */
     rqe->flags &= ~XFR_PARITYOP;			    /* reset flags that brought use here */
     rqg->flags &= ~XFR_PARITYOP;			    /* reset flags that brought use here */
     rqe->b.b_bcount = rqe->buflen << DEV_BSHIFT;	    /* length to write */
     rqe->b.b_bufsize = rqe->b.b_bcount;			    /* don't claim we have more */
     rqe->b.b_resid = rqe->b.b_bcount;			    /* nothing transferred */
     rqg->active++;					    /* another active request */
     rqe->b.b_vp->v_numoutput++;				    /* one more output going */
     drive = &DRIVE[rqe->driveno];			    /* drive to access */
 #if VINUMDEBUG
     if (debug & DEBUG_ADDRESSES)
 	log(LOG_DEBUG,
 	    "  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
 	    rqe->b.b_flags & B_READ ? "Read" : "Write",
 	    major(rqe->b.b_dev),
 	    minor(rqe->b.b_dev),
 	    rqe->sdno,
 	    (u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
 	    rqe->b.b_blkno,
 	    rqe->b.b_bcount);				    /* XXX */
     if (debug & DEBUG_NUMOUTPUT)
 	log(LOG_DEBUG,
 	    "  raid5.3 sd %d numoutput %ld\n",
 	    rqe->sdno,
 	    rqe->b.b_vp->v_numoutput);
     if (debug & DEBUG_LASTREQS)
 	logrq(loginfo_raid5_parity, (union rqinfou) rqe, bp);
 #endif
     (*bdevsw(rqe->b.b_dev)->d_strategy) (&rqe->b);
 }
Index: head/sys/dev/vinum/vinumio.c
===================================================================
--- head/sys/dev/vinum/vinumio.c	(revision 49534)
+++ head/sys/dev/vinum/vinumio.c	(revision 49535)
@@ -1,1103 +1,1102 @@
 /*-
  * Copyright (c) 1997, 1998
  *	Nan Yang Computer Services Limited.  All rights reserved.
  *
  *  This software is distributed under the so-called ``Berkeley
  *  License'':
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Nan Yang Computer
  *      Services Limited.
  * 4. Neither the name of the Company nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *  
  * This software is provided ``as is'', and any express or implied
  * warranties, including, but not limited to, the implied warranties of
  * merchantability and fitness for a particular purpose are disclaimed.
  * In no event shall the company or contributors be liable for any
  * direct, indirect, incidental, special, exemplary, or consequential
  * damages (including, but not limited to, procurement of substitute
  * goods or services; loss of use, data, or profits; or business
  * interruption) however caused and on any theory of liability, whether
  * in contract, strict liability, or tort (including negligence or
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *
- * $Id: vinumio.c,v 1.33 1999/08/07 08:07:05 grog Exp $
+ * $Id: vinumio.c,v 1.34 1999/08/08 14:11:03 bde Exp $
  */
 
 #include <dev/vinum/vinumhdr.h>
 #include <dev/vinum/request.h>
-#include <miscfs/specfs/specdev.h>
 
 static char *sappend(char *txt, char *s);
 static int drivecmp(const void *va, const void *vb);
 
 /*
  * Open the device associated with the drive, and set drive's vp.
  * Return an error number 
  */
 int
 open_drive(struct drive *drive, struct proc *p, int verbose)
 {
     struct nameidata nd;
     int error;
 
     if (drive->devicename[0] != '/')			    /* no device name */
 	sprintf(drive->devicename, "/dev/%s", drive->label.name); /* get it from the drive name */
     NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, drive->devicename, p);
     error = vn_open(&nd, FREAD | FWRITE, 0);		    /* open the device */
     if (error != 0) {					    /* can't open? */
 	set_drive_state(drive->driveno, drive_down, setstate_force);
 	drive->lasterror = error;
 	if (verbose)
 	    log(LOG_WARNING,
 		"vinum open_drive %s: failed with error %d\n",
 		drive->devicename, error);		    /* XXX */
 	return error;
     }
     drive->vp = nd.ni_vp;
     drive->p = p;
 
     if (drive->vp->v_usecount > 1) {			    /* already in use? */
 	if (verbose)
 	    log(LOG_WARNING,
 		"open_drive %s: use count %d, ignoring\n",  /* XXX where does this come from? */
 		drive->devicename,
 		drive->vp->v_usecount);
     }
     if (drive->vp->v_type != VBLK) {			    /* only consider block devices */
 	VOP_UNLOCK(drive->vp, 0, drive->p);
 	close_drive(drive);
 	set_drive_state(drive->driveno, drive_down, setstate_force); /* this also closes the drive */
 	drive->lasterror = ENOTBLK;
 	if (verbose)
 	    log(LOG_WARNING,
 		"vinum open_drive %s: Not a block device\n",
 		drive->devicename);			    /* XXX */
 	return ENOTBLK;
     }
     drive->vp->v_numoutput = 0;
     VOP_UNLOCK(drive->vp, 0, drive->p);
     return 0;
 }
 
 /*
  * Set some variables in the drive struct
  * in more convenient form.  Return error indication 
  */
 int 
 set_drive_parms(struct drive *drive)
 {
     drive->blocksize = BLKDEV_IOSIZE;			    /* XXX do we need this? */
     drive->secsperblock = drive->blocksize		    /* number of sectors per block */
 	/ drive->partinfo.disklab->d_secsize;
 
     /* Now update the label part */
     bcopy(hostname, drive->label.sysname, VINUMHOSTNAMELEN); /* put in host name */
     getmicrotime(&drive->label.date_of_birth);		    /* and current time */
     drive->label.drive_size = ((u_int64_t) drive->partinfo.part->p_size) /* size of the drive in bytes */
     *((u_int64_t) drive->partinfo.disklab->d_secsize);
 #if VINUMDEBUG
     if (debug & DEBUG_BIGDRIVE)				    /* pretend we're 100 times as big */
 	drive->label.drive_size *= 100;
 #endif
 
     /* number of sectors available for subdisks */
     drive->sectors_available = drive->label.drive_size / DEV_BSIZE - DATASTART;
 
     /*
      * XXX Bug in 3.0 as of January 1998: you can open
      * non-existent slices.  They have a length of 0 
      */
     if (drive->label.drive_size < MINVINUMSLICE) {	    /* too small to worry about */
 	set_drive_state(drive->driveno, drive_down, setstate_force);
 	drive->lasterror = ENOSPC;
 	return ENOSPC;
     }
     drive->freelist_size = INITIAL_DRIVE_FREELIST;	    /* initial number of entries */
     drive->freelist = (struct drive_freelist *)
 	Malloc(INITIAL_DRIVE_FREELIST * sizeof(struct drive_freelist));
     if (drive->freelist == NULL)			    /* can't malloc, dammit */
 	return ENOSPC;
     drive->freelist_entries = 1;			    /* just (almost) the complete drive */
     drive->freelist[0].offset = DATASTART;		    /* starts here */
     drive->freelist[0].sectors = (drive->label.drive_size >> DEV_BSHIFT) - DATASTART; /* and it's this long */
     if (drive->label.name[0] != '\0')			    /* got a name */
 	set_drive_state(drive->driveno, drive_up, setstate_force); /* our drive is accessible */
     else						    /* we know about it, but that's all */
 	drive->state = drive_referenced;
     return 0;
 }
 
 /*
  * Initialize a drive: open the device and add device
  * information 
  */
 int 
 init_drive(struct drive *drive, int verbose)
 {
     int error;
 
     if (drive->devicename[0] != '/') {
 	drive->lasterror = EINVAL;
 	log(LOG_ERR, "vinum: Can't open drive without drive name\n");
 	return EINVAL;
     }
     error = open_drive(drive, curproc, verbose);	    /* open the drive */
     if (error)
 	return error;
 
     error = VOP_IOCTL(drive->vp,			    /* get the partition information */
 	DIOCGPART,
 	(caddr_t) & drive->partinfo,
 	FREAD,
 	NOCRED,
 	curproc);
     if (error) {
 	if (verbose)
 	    log(LOG_WARNING,
 		"vinum open_drive %s: Can't get partition information, error %d\n",
 		drive->devicename,
 		error);					    /* XXX */
 	close_drive(drive);
 	drive->lasterror = error;
 	drive->state = drive_down;			    /* don't tell the system about this one at all */
 	return error;
     }
     if (drive->partinfo.part->p_fstype != FS_VINUM) {	    /* not Vinum */
 	drive->lasterror = EFTYPE;
 	if (verbose)
 	    log(LOG_WARNING,
 		"vinum open_drive %s: Wrong partition type for vinum\n",
 		drive->devicename);			    /* XXX */
 	close_drive(drive);
 	drive->state = drive_down;			    /* don't tell the system about this one at all */
 	return EFTYPE;
     }
     return set_drive_parms(drive);			    /* set various odds and ends */
 }
 
 /* Close a drive if it's open. */
 void 
 close_drive(struct drive *drive)
 {
     LOCKDRIVE(drive);					    /* keep the daemon out */
     if (drive->vp)
 	close_locked_drive(drive);			    /* and close it */
     unlockdrive(drive);
 }
 
 /*
  * Real drive close code, called with drive already locked.
  * We have also checked that the drive is open.  No errors.
  */
 void 
 close_locked_drive(struct drive *drive)
 {
     /*
      * If we can't access the drive, we can't flush 
      * the queues, which spec_close() will try to
      * do.  Get rid of them here first.
      */
     if (drive->state < drive_up) {			    /* we can't access the drive, */
 	vn_lock(drive->vp, LK_EXCLUSIVE | LK_RETRY, drive->p);
 	vinvalbuf(drive->vp, 0, NOCRED, drive->p, 0, 0);
 	VOP_UNLOCK(drive->vp, 0, drive->p);
     }
     vn_close(drive->vp, FREAD | FWRITE, NOCRED, drive->p);
 #ifdef VINUMDEBUG
     if ((debug & DEBUG_WARNINGS)			    /* want to hear about them */
     &&(drive->vp->v_usecount))				    /* XXX shouldn't happen */
 	log(LOG_WARNING,
 	    "close_drive %s: use count still %d\n",
 	    drive->devicename,
 	    drive->vp->v_usecount);
 #endif
     drive->vp = NULL;
 }
 
 /*
  * Remove drive from the configuration.
  * Caller must ensure that it isn't active
  */
 void 
 remove_drive(int driveno)
 {
     struct drive *drive = &vinum_conf.drive[driveno];
     long long int nomagic = VINUM_NOMAGIC;		    /* no magic number */
 
     if (drive->state > drive_referenced) {		    /* real drive */
 	if (drive->state == drive_up)
 	    write_drive(drive,				    /* obliterate the magic, but leave a hint */
 		(char *) &nomagic,
 		8,
 		VINUM_LABEL_OFFSET);
 	free_drive(drive);				    /* close it and free resources */
 	save_config();					    /* and save the updated configuration */
     }
 }
 
 /*
  * Transfer drive data.  Usually called from one of these defines;
  * #define read_drive(a, b, c, d) driveio (a, b, c, d, B_READ)
  * #define write_drive(a, b, c, d) driveio (a, b, c, d, B_WRITE)
  *
  * length and offset are in bytes, but must be multiples of sector
  * size.  The function *does not check* for this condition, and
  * truncates ruthlessly.
  * Return error number
  */
 int
 driveio(struct drive *drive, char *buf, size_t length, off_t offset, int flag)
 {
     int error;
     struct buf *bp;
     char foo[40];
 
     error = 0;						    /* to keep the compiler happy */
     while (length) {					    /* divide into small enough blocks */
 	int len = min(length, MAXBSIZE);		    /* maximum block device transfer is MAXBSIZE */
 
 	bp = geteblk(len);				    /* get a buffer header */
 	bp->b_flags = flag;
 	bp->b_dev = drive->vp->v_rdev;			    /* device */
 	bp->b_blkno = offset / drive->partinfo.disklab->d_secsize; /* block number */
 	bp->b_data = buf;
 	bp->b_bcount = len;
 	bp->b_bufsize = len;
 
 	(*bdevsw(bp->b_dev)->d_strategy) (bp);		    /* initiate the transfer */
 
 	error = biowait(bp);
 	printf("driveio: %s dev %d.%d, block 0x%x, len 0x%lx, error %d\n", /* XXX */
 	    flag ? "read" : "write",
 	    major(bp->b_dev),
 	    minor(bp->b_dev),
 	    bp->b_blkno,
 	    bp->b_bcount,
 	    error);
 	bcopy(buf, foo, 40);
 	foo[39] = '\0';
 	printf("---> %s\n", foo);			    /* XXXXXX */
 	bp->b_flags |= B_INVAL | B_AGE;
 	brelse(bp);
 	if (error)
 	    break;
 	length -= len;					    /* update pointers */
 	buf += len;
 	offset += len;
     }
     return error;
 }
 
 /*
  * Read data from a drive
  *
  * Return error number
  */
 int
 read_drive(struct drive *drive, void *buf, size_t length, off_t offset)
 {
     int error;
     struct buf *bp;
     daddr_t nextbn;
     long bscale;
 
     struct uio uio;
     struct iovec iov;
     daddr_t blocknum;					    /* block number */
     int blockoff;					    /* offset in block */
     int count;						    /* amount to transfer */
 
     iov.iov_base = buf;
     iov.iov_len = length;
 
     uio.uio_iov = &iov;
     uio.uio_iovcnt = length;
     uio.uio_offset = offset;
     uio.uio_resid = length;
     uio.uio_segflg = UIO_SYSSPACE;
     uio.uio_rw = UIO_READ;
     uio.uio_procp = curproc;
 
     bscale = btodb(drive->blocksize);			    /* mask off offset from block number */
     do {
 	blocknum = btodb(uio.uio_offset) & ~(bscale - 1);   /* get the block number */
 	blockoff = uio.uio_offset % drive->blocksize;	    /* offset in block */
 	count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
 	    uio.uio_resid);
 
 	/* XXX Check this.  I think the test is wrong */
 	if (drive->vp->v_lastr + bscale == blocknum) {	    /* did our last read finish in this block? */
 	    nextbn = blocknum + bscale;			    /* note the end of the transfer */
 	    error = breadn(drive->vp,			    /* and read with read-ahead */
 		blocknum,
 		(int) drive->blocksize,
 		&nextbn,
 		(int *) &drive->blocksize,
 		1,
 		NOCRED,
 		&bp);
 	} else						    /* random read: just read this block */
 	    error = bread(drive->vp, blocknum, (int) drive->blocksize, NOCRED, &bp);
 	drive->vp->v_lastr = blocknum;			    /* note the last block we read */
 	count = min(count, drive->blocksize - bp->b_resid);
 	if (error) {
 	    brelse(bp);
 	    return error;
 	}
 	error = uiomove((char *) bp->b_data + blockoff, count, &uio); /* move the data */
 	brelse(bp);
     }
     while (error == 0 && uio.uio_resid > 0 && count != 0);
     return error;
 }
 
 /*
  * Write data to a drive
  *
  * Return error number
  */
 int 
 write_drive(struct drive *drive, void *buf, size_t length, off_t offset)
 {
     int error;
     struct buf *bp;
     struct uio uio;
     struct iovec iov;
     daddr_t blocknum;					    /* block number */
     int blockoff;					    /* offset in block */
     int count;						    /* amount to transfer */
     int blockshift;
 
     if (drive->state == drive_down)			    /* currently down */
 	return 0;					    /* ignore */
     if (drive->vp == NULL) {
 	drive->lasterror = ENODEV;
 	return ENODEV;					    /* not configured yet */
     }
     iov.iov_base = buf;
     iov.iov_len = length;
 
     uio.uio_iov = &iov;
     uio.uio_iovcnt = length;
     uio.uio_offset = offset;
     uio.uio_resid = length;
     uio.uio_segflg = UIO_SYSSPACE;
     uio.uio_rw = UIO_WRITE;
     uio.uio_procp = curproc;
 
     error = 0;
     blockshift = btodb(drive->blocksize) - 1;		    /* amount to shift block number
 							    * to get sector number */
     do {
 	blocknum = btodb(uio.uio_offset) & ~blockshift;	    /* get the block number */
 	blockoff = uio.uio_offset % drive->blocksize;	    /* offset in block */
 	count = min((unsigned) (drive->blocksize - blockoff), /* amount to transfer in this block */
 	    uio.uio_resid);
 	if (count == drive->blocksize)			    /* the whole block */
 	    bp = getblk(drive->vp, blocknum, drive->blocksize, 0, 0); /* just transfer it */
 	else						    /* partial block: */
 	    error = bread(drive->vp,			    /* read it first */
 		blocknum,
 		drive->blocksize,
 		NOCRED,
 		&bp);
 	count = min(count, drive->blocksize - bp->b_resid); /* how much will we transfer now? */
 	if (error == 0)
 	    error = uiomove((char *) bp->b_data + blockoff, /* move the data to the block */
 		count,
 		&uio);
 	if (error) {
 	    brelse(bp);
 	    drive->lasterror = error;
 	    switch (error) {
 	    case EIO:
 		set_drive_state(drive->driveno, drive_down, setstate_force);
 		break;
 
 		/* XXX Add other possibilities here */
 	    default:
 	    }
 	    return error;
 	}
 	if (count + blockoff == drive->blocksize)
 	    /*
 	     * The transfer goes to the end of the block.  There's
 	     * no need to wait for any more data to arrive. 
 	     */
 	    bawrite(bp);				    /* start the write now */
 	else
 	    bdwrite(bp);				    /* do a delayed write */
     }
     while (error == 0 && uio.uio_resid > 0 && count != 0);
     if (error)
 	drive->lasterror = error;
     return error;					    /* OK */
 }
 
 /* Wake up on completion */
 void 
 drive_io_done(struct buf *bp)
 {
     wakeup((caddr_t) bp);				    /* Wachet auf! */
     bp->b_flags &= ~B_CALL;				    /* don't do this again */
 }
 
 /*
  * Check a drive for a vinum header.  If found, 
  * update the drive information.  We come here
  * with a partially populated drive structure
  * which includes the device name.
  *
  * Return information on what we found.
  *
  * This function is called from two places: check_drive,
  * which wants to find out whether the drive is a
  * Vinum drive, and config_drive, which asserts that
  * it is a vinum drive.  In the first case, we don't
  * print error messages (verbose==0), in the second
  * we do (verbose==1).
  */
 enum drive_label_info 
 read_drive_label(struct drive *drive, int verbose)
 {
     int error;
     int result;						    /* result of our search */
     struct vinum_hdr *vhdr;				    /* and as header */
 
     error = init_drive(drive, 0);			    /* find the drive */
     if (error)						    /* find the drive */
 	return DL_CANT_OPEN;				    /* not ours */
 
     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* allocate buffers */
     CHECKALLOC(vhdr, "Can't allocate memory");
 
     error = read_drive(drive, (void *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
     if (vhdr->magic == VINUM_MAGIC) {			    /* ours! */
 	if (drive->label.name[0]			    /* we have a name for this drive */
 	&&(strcmp(drive->label.name, vhdr->label.name))) {  /* but it doesn't match the real name */
 	    drive->lasterror = EINVAL;
 	    result = DL_WRONG_DRIVE;			    /* it's the wrong drive */
 	} else {
 	    drive->state = drive_up;			    /* it's OK by us */
 	    result = DL_OURS;
 	}
 	/*
 	 * We copy the drive anyway so that we have
 	 * the correct name in the drive info.  This
 	 * may not be the name specified 
 	 */
 	drive->label = vhdr->label;			    /* put in the label information */
     } else if (vhdr->magic == VINUM_NOMAGIC)		    /* was ours, but we gave it away */
 	result = DL_DELETED_LABEL;			    /* and return the info */
     else
 	result = DL_NOT_OURS;				    /* we could have it, but we don't yet */
     Free(vhdr);						    /* that's all. */
     return result;
 }
 
 /*
  * Check a drive for a vinum header.  If found, 
  * read configuration information from the drive and
  * incorporate the data into the configuration.
  *
  * Return drive number.
  */
 struct drive *
 check_drive(char *devicename)
 {
     int driveno;
     int i;
     struct drive *drive;
 
     driveno = find_drive_by_dev(devicename, 1);		    /* if entry doesn't exist, create it */
     drive = &vinum_conf.drive[driveno];			    /* and get a pointer */
 
     if (read_drive_label(drive, 0) == DL_OURS) {	    /* not ours */
 	for (i = 0; i < vinum_conf.drives_allocated; i++) { /* see if the name already exists */
 	    if ((i != driveno)				    /* not this drive */
 &&(DRIVE[i].state != drive_unallocated)			    /* and it's allocated */
 	    &&(strcmp(DRIVE[i].label.name,
 			DRIVE[driveno].label.name) == 0)) { /* and it has the same name */
 		struct drive *mydrive = &DRIVE[i];
 
 		if (mydrive->devicename[0] == '/') {	    /* we know a device name for it */
 		    /*
 		     * set an error, but don't take the drive down:
 		     * that would cause unneeded error messages.
 		     */
 		    drive->lasterror = EEXIST;
 		    break;
 		} else {				    /* it's just a place holder, */
 		    int sdno;
 
 		    for (sdno = 0; sdno < vinum_conf.subdisks_allocated; sdno++) { /* look at each subdisk */
 			if ((SD[sdno].driveno == i)	    /* it's pointing to this one, */
 			&&(SD[sdno].state != sd_unallocated)) {	/* and it's a real subdisk */
 			    SD[sdno].driveno = drive->driveno; /* point to the one we found */
 			    update_sd_state(sdno);	    /* and update its state */
 			}
 		    }
 		    bzero(mydrive, sizeof(struct drive));   /* don't deallocate it, just remove it */
 		}
 	    }
 	}
     } else {
 	if (drive->lasterror == 0)
 	    drive->lasterror = ENODEV;
 	set_drive_state(drive->driveno, drive_down, setstate_force);
     }
     return drive;
 }
 
 static char *
 sappend(char *txt, char *s)
 {
     while ((*s++ = *txt++) != 0);
     return s - 1;
 }
 
 void 
 format_config(char *config, int len)
 {
     int i;
     int j;
     char *s = config;
     char *configend = &config[len];
 
     bzero(config, len);
 
     /* First write the volume configuration */
     for (i = 0; i < vinum_conf.volumes_allocated; i++) {
 	struct volume *vol;
 
 	vol = &vinum_conf.volume[i];
 	if ((vol->state > volume_uninit)
 	    && (vol->name[0] != '\0')) {		    /* paranoia */
 	    if (vol->preferred_plex >= 0)		    /* preferences, */
 		snprintf(s,
 		    configend - s,
 		    "volume %s state %s readpol prefer %s",
 		    vol->name,
 		    volume_state(vol->state),
 		    vinum_conf.plex[vol->preferred_plex].name);
 	    else					    /* default round-robin */
 		snprintf(s,
 		    configend - s,
 		    "volume %s state %s",
 		    vol->name,
 		    volume_state(vol->state));
 	    while (*s)
 		s++;					    /* find the end */
 	    s = sappend("\n", s);
 	}
     }
 
     /* Then the plex configuration */
     for (i = 0; i < vinum_conf.plexes_allocated; i++) {
 	struct plex *plex;
 
 	plex = &vinum_conf.plex[i];
 	if ((plex->state != plex_referenced)
 	    && (plex->name[0] != '\0')) {		    /* paranoia */
 	    snprintf(s,
 		configend - s,
 		"plex name %s state %s org %s ",
 		plex->name,
 		plex_state(plex->state),
 		plex_org(plex->organization));
 	    while (*s)
 		s++;					    /* find the end */
 	    if ((plex->organization == plex_striped)
 		|| (plex->organization == plex_raid5)) {
 		snprintf(s,
 		    configend - s,
 		    "%ds ",
 		    (int) plex->stripesize);
 		while (*s)
 		    s++;				    /* find the end */
 	    }
 	    if (plex->volno >= 0)			    /* we have a volume */
 		snprintf(s,
 		    configend - s,
 		    "vol %s ",
 		    vinum_conf.volume[plex->volno].name);
 	    while (*s)
 		s++;					    /* find the end */
 	    for (j = 0; j < plex->subdisks; j++) {
 		snprintf(s,
 		    configend - s,
 		    " sd %s",
 		    vinum_conf.sd[plex->sdnos[j]].name);
 	    }
 	    s = sappend("\n", s);
 	}
     }
 
     /* And finally the subdisk configuration */
     for (i = 0; i < vinum_conf.subdisks_allocated; i++) {
 	struct sd *sd;
 
 	sd = &SD[i];
 	if ((sd->state != sd_referenced)
 	    && (sd->name[0] != '\0')) {			    /* paranoia */
 	    if (sd->plexno >= 0)
 		snprintf(s,
 		    configend - s,
 		    "sd name %s drive %s plex %s state %s len %llus driveoffset %llus plexoffset %llds\n",
 		    sd->name,
 		    vinum_conf.drive[sd->driveno].label.name,
 		    vinum_conf.plex[sd->plexno].name,
 		    sd_state(sd->state),
 		    (unsigned long long) sd->sectors,
 		    (unsigned long long) sd->driveoffset,
 		    (long long) sd->plexoffset);
 	    else
 		snprintf(s,
 		    configend - s,
 		    "sd name %s drive %s state %s len %llus driveoffset %llus detached\n",
 		    sd->name,
 		    vinum_conf.drive[sd->driveno].label.name,
 		    sd_state(sd->state),
 		    (unsigned long long) sd->sectors,
 		    (unsigned long long) sd->driveoffset);
 	    while (*s)
 		s++;					    /* find the end */
 
 	}
     }
     if (s > &config[len - 2])
 	panic("vinum: configuration data overflow");
 }
 
 /*
  * issue a save config request to the d�mon.  The actual work
  * is done in process context by daemon_save_config 
  */
 void 
 save_config(void)
 {
     queue_daemon_request(daemonrq_saveconfig, (union daemoninfo) NULL);
 }
 
 /*
  * Write the configuration to all vinum slices.  This
  * is performed by the d�mon only 
  */
 void 
 daemon_save_config(void)
 {
     int error;
     int written_config;					    /* set when we first write the config to disk */
     int driveno;
     struct drive *drive;				    /* point to current drive info */
     struct vinum_hdr *vhdr;				    /* and as header */
     char *config;					    /* point to config data */
     int wlabel_on;					    /* to set writing label on/off */
 
     /* don't save the configuration while we're still working on it */
     if (vinum_conf.flags & VF_CONFIGURING)
 	return;
     written_config = 0;					    /* no config written yet */
     /* Build a volume header */
     vhdr = (struct vinum_hdr *) Malloc(VINUMHEADERLEN);	    /* get space for the config data */
     CHECKALLOC(vhdr, "Can't allocate config data");
     vhdr->magic = VINUM_MAGIC;				    /* magic number */
     vhdr->config_length = MAXCONFIG;			    /* length of following config info */
 
     config = Malloc(MAXCONFIG);				    /* get space for the config data */
     CHECKALLOC(config, "Can't allocate config data");
 
     format_config(config, MAXCONFIG);
     error = 0;						    /* no errors yet */
     for (driveno = 0; driveno < vinum_conf.drives_allocated; driveno++) {
 	drive = &vinum_conf.drive[driveno];		    /* point to drive */
 	if (drive->state > drive_referenced) {
 	    LOCKDRIVE(drive);				    /* don't let it change */
 
 	    /*
 	     * First, do some drive consistency checks.  Some
 	     * of these are kludges, others require a process
 	     * context and couldn't be done before 
 	     */
 	    if ((drive->devicename[0] == '\0')		    /* XXX we keep getting these nameless drives */
 	    ||(drive->label.name[0] == '\0')) {		    /* XXX we keep getting these nameless drives */
 		unlockdrive(drive);
 		log(LOG_WARNING,
 		    "Removing incomplete drive, index %d\n",
 		    driveno);
 		if (drive->vp)				    /* how can it be open without a name? */
 		    close_drive(drive);
 		free_drive(drive);			    /* get rid of it */
 		break;
 	    }
 	    if ((drive->vp == NULL)			    /* drive not open */
 	    &&(drive->state > drive_down)) {		    /* and it thinks it's not down */
 		unlockdrive(drive);
 		set_drive_state(driveno, drive_down, setstate_force); /* tell it what's what */
 		continue;
 	    }
 	    if ((drive->state == drive_down)		    /* it's down */
 	    &&(drive->vp != NULL)) {			    /* but open, */
 		unlockdrive(drive);
 		close_drive(drive);			    /* close it */
 	    } else if (drive->state > drive_down) {
 		getmicrotime(&drive->label.last_update);    /* time of last update is now */
 		bcopy((char *) &drive->label,		    /* and the label info from the drive structure */
 		    (char *) &vhdr->label,
 		    sizeof(vhdr->label));
 		if ((drive->state != drive_unallocated)
 		    && (drive->state != drive_referenced)) { /* and it's a real drive */
 		    wlabel_on = 1;			    /* enable writing the label */
 		    error = VOP_IOCTL(drive->vp,	    /* make the label writeable */
 			DIOCWLABEL,
 			(caddr_t) & wlabel_on,
 			FWRITE,
 			NOCRED,
 			curproc);
 		    if (error == 0)
 			error = write_drive(drive, (char *) vhdr, VINUMHEADERLEN, VINUM_LABEL_OFFSET);
 		    if (error == 0)
 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET); /* first config copy */
 		    if (error == 0)
 			error = write_drive(drive, config, MAXCONFIG, VINUM_CONFIG_OFFSET + MAXCONFIG);	/* second copy */
 		    wlabel_on = 0;			    /* enable writing the label */
 		    if (error == 0)
 			VOP_IOCTL(drive->vp,		    /* make the label non-writeable again */
 			    DIOCWLABEL,
 			    (caddr_t) & wlabel_on,
 			    FWRITE,
 			    NOCRED,
 			    curproc);
 		    unlockdrive(drive);
 		    if (error) {
 			log(LOG_ERR,
 			    "vinum: Can't write config to %s, error %d\n",
 			    drive->devicename,
 			    error);
 			set_drive_state(drive->driveno, drive_down, setstate_force);
 		    } else
 			written_config = 1;		    /* we've written it on at least one drive */
 		}
 	    } else					    /* not worth looking at, */
 		unlockdrive(drive);			    /* just unlock it again */
 	}
     }
     Free(vhdr);
     Free(config);
 }
 
 /*
  * Disk labels are a mess.  The correct way to access them
  * is with the DIOC[GSW]DINFO ioctls, but some programs, such
  * as newfs, access the disk directly, so we have to write
  * things there.  We do this only on request.  If a user
  * request tries to read it directly, we fake up one on the fly.
  */
 
 /*
  * get_volume_label returns a label structure to lp, which
  * is allocated by the caller 
  */
 void 
 get_volume_label(struct volume *vol, struct disklabel *lp)
 {
     bzero(lp, sizeof(struct disklabel));
 
     strncpy(lp->d_typename, "vinum", sizeof(lp->d_typename));
     lp->d_type = DTYPE_VINUM;
     strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
     lp->d_rpm = 14400 * vol->plexes;			    /* to keep them guessing */
     lp->d_interleave = 1;
     lp->d_flags = 0;
 
     /*
      * Fitting unto the vine, a vinum has a single
      *  track with all its sectors 
      */
     lp->d_secsize = DEV_BSIZE;				    /* bytes per sector */
     lp->d_nsectors = vol->size;				    /* data sectors per track */
     lp->d_ntracks = 1;					    /* tracks per cylinder */
     lp->d_ncylinders = 1;				    /* data cylinders per unit */
     lp->d_secpercyl = vol->size;			    /* data sectors per cylinder */
     lp->d_secperunit = vol->size;			    /* data sectors per unit */
 
     lp->d_bbsize = BBSIZE;
     lp->d_sbsize = SBSIZE;
 
     lp->d_magic = DISKMAGIC;
     lp->d_magic2 = DISKMAGIC;
 
     /*
      * Set up partitions a, b and c to be identical
      * and the size of the volume.  a is UFS, b is
      * swap, c is nothing 
      */
     lp->d_partitions[0].p_size = vol->size;
     lp->d_partitions[0].p_fsize = 1024;
     lp->d_partitions[0].p_fstype = FS_BSDFFS;		    /* FreeBSD File System :-) */
     lp->d_partitions[0].p_fsize = 1024;			    /* FS fragment size */
     lp->d_partitions[0].p_frag = 8;			    /* and fragments per block */
     lp->d_partitions[SWAP_PART].p_size = vol->size;
     lp->d_partitions[SWAP_PART].p_fstype = FS_SWAP;	    /* swap partition */
     lp->d_partitions[LABEL_PART].p_size = vol->size;
     lp->d_npartitions = LABEL_PART + 1;
     strncpy(lp->d_packname, vol->name, min(sizeof(lp->d_packname), sizeof(vol->name)));
     lp->d_checksum = dkcksum(lp);
 }
 
 /* Write a volume label.  This implements the VINUM_LABEL ioctl. */
 int 
 write_volume_label(int volno)
 {
     struct disklabel *lp;
     struct buf *bp;
     struct disklabel *dlp;
     struct volume *vol;
     int error;
 
     lp = (struct disklabel *) Malloc((sizeof(struct disklabel) + (DEV_BSIZE - 1)) & (DEV_BSIZE - 1));
     if (lp == 0)
 	return ENOMEM;
 
     if ((unsigned) (volno) >= (unsigned) vinum_conf.volumes_allocated) /* invalid volume */
 	return ENOENT;
 
     vol = &VOL[volno];					    /* volume in question */
     if (vol->state <= volume_uninit)			    /* nothing there */
 	return ENXIO;
     else if (vol->state < volume_up)			    /* not accessible */
 	return EIO;					    /* I/O error */
 
     get_volume_label(vol, lp);				    /* get the label */
 
     /*
      * Now write to disk.  This code is derived from the
      * system writedisklabel (), which does silly things
      * like reading the label and refusing to write
      * unless it's already there. */
     bp = geteblk((int) lp->d_secsize);			    /* get a buffer */
     bp->b_dev = makedev(CDEV_MAJOR, vol->volno);	    /* our own raw volume */
     bp->b_blkno = LABELSECTOR * ((int) lp->d_secsize / DEV_BSIZE);
     bp->b_bcount = lp->d_secsize;
     bzero(bp->b_data, lp->d_secsize);
     dlp = (struct disklabel *) bp->b_data;
     *dlp = *lp;
     bp->b_flags &= ~B_INVAL;
     bp->b_flags |= B_WRITE;
     vinumstrategy(bp);					    /* write it out */
     error = biowait(bp);
     bp->b_flags |= B_INVAL | B_AGE;
     brelse(bp);
     return error;
 }
 
 /* Initialize a subdisk */
 int 
 initsd(int sdno)
 {
     return 0;
 }
 
 /* Look at all disks on the system for vinum slices */
 int 
 vinum_scandisk(char *devicename[], int drives)
 {
     struct drive *volatile drive;
     volatile int driveno;
     int firstdrive;					    /* first drive in this list */
     volatile int gooddrives;				    /* number of usable drives found */
     int firsttime;					    /* set if we have never configured before */
     int error;
     struct nameidata nd;				    /* mount point credentials */
     char *config_text;					    /* read the config info from disk into here */
     char *volatile cptr;				    /* pointer into config information */
     char *eptr;						    /* end pointer into config information */
     char *config_line;					    /* copy the config line to */
     volatile int status;
     int *volatile drivelist;				    /* list of drive indices */
 #define DRIVENAMELEN 64
 #define DRIVEPARTS   35					    /* max partitions per drive, excluding c */
     char partname[DRIVENAMELEN];			    /* for creating partition names */
 
     status = 0;						    /* success indication */
     vinum_conf.flags |= VF_READING_CONFIG;		    /* reading config from disk */
 
     gooddrives = 0;					    /* number of usable drives found */
     firstdrive = vinum_conf.drives_used;		    /* the first drive */
     firsttime = vinum_conf.drives_used == 0;		    /* are we a virgin? */
 
     /* allocate a drive pointer list */
     drivelist = (int *) Malloc(drives * DRIVEPARTS * sizeof(int));
     CHECKALLOC(drivelist, "Can't allocate memory");
 
     /* Open all drives and find which was modified most recently */
     for (driveno = 0; driveno < drives; driveno++) {
 	char part;					    /* UNIX partition */
 	int slice;
 	int founddrive;					    /* flag when we find a vinum drive */
 
 	founddrive = 0;					    /* no vinum drive found yet on this spindle */
 	/* first try the partition table */
 	for (slice = 1; slice < 5; slice++)
 	    for (part = 'a'; part < 'i'; part++) {
 		if (part != 'c') {			    /* don't do the c partition */
 		    snprintf(partname,
 			DRIVENAMELEN,
 			"%ss%d%c",
 			devicename[driveno],
 			slice,
 			part);
 		    drive = check_drive(partname);	    /* try to open it */
 		    if (drive->lasterror != 0)		    /* didn't work, */
 			free_drive(drive);		    /* get rid of it */
 		    else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
 			log(LOG_WARNING,
 			    "vinum: already read config from %s\n", /* say so */
 			    drive->label.name);
 		    else {
 			drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
 			drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
 			gooddrives++;
 		    }
 		}
 	    }
 	if (founddrive == 0) {				    /* didn't find anything, */
 	    for (part = 'a'; part < 'i'; part++)	    /* try the compatibility partition */
 		if (part != 'c') {			    /* don't do the c partition */
 		    snprintf(partname,			    /* /dev/sd0a */
 			DRIVENAMELEN,
 			"%s%c",
 			devicename[driveno],
 			part);
 		    drive = check_drive(partname);	    /* try to open it */
 		    if ((drive->lasterror != 0)		    /* didn't work, */
 		    ||(drive->state != drive_up))
 			free_drive(drive);		    /* get rid of it */
 		    else if (drive->flags & VF_CONFIGURED)  /* already read this config, */
 			log(LOG_WARNING,
 			    "vinum: already read config from %s\n", /* say so */
 			    drive->label.name);
 		    else {
 			drivelist[gooddrives] = drive->driveno;	/* keep the drive index */
 			drive->flags &= ~VF_NEWBORN;	    /* which is no longer newly born */
 			gooddrives++;
 		    }
 		}
 	}
     }
 
     if (gooddrives == 0) {
 	log(LOG_WARNING, "vinum: no drives found\n");
 	return ENOENT;
     }
     /*
      * We now have at least one drive
      * open.  Sort them in order of config time
      * and merge the config info with what we
      * have already 
      */
     qsort(drivelist, gooddrives, sizeof(int), drivecmp);
     config_text = (char *) Malloc(MAXCONFIG * 2);	    /* allocate buffers */
     CHECKALLOC(config_text, "Can't allocate memory");
     config_line = (char *) Malloc(MAXCONFIGLINE * 2);	    /* allocate buffers */
     CHECKALLOC(config_line, "Can't allocate memory");
     for (driveno = 0; driveno < gooddrives; driveno++) {    /* now include the config */
 	drive = &DRIVE[drivelist[driveno]];		    /* point to the drive */
 
 	if (firsttime && (driveno == 0))		    /* we've never configured before, */
 	    log(LOG_INFO, "vinum: reading configuration from %s\n", drive->devicename);
 	else
 	    log(LOG_INFO, "vinum: updating configuration from %s\n", drive->devicename);
 
 	/* Read in both copies of the configuration information */
 	error = read_drive(drive, config_text, MAXCONFIG * 2, VINUM_CONFIG_OFFSET);
 
 	if (error != 0) {
 	    log(LOG_ERR, "vinum: Can't read device %s, error %d\n", drive->devicename, error);
 	    Free(config_text);
 	    Free(config_line);
 	    free_drive(drive);				    /* give it back */
 	    status = error;
 	}
 	/*
 	 * XXX At this point, check that the two copies are the same, and do something useful if not.
 	 * In particular, consider which is newer, and what this means for the integrity of the
 	 * data on the drive 
 	 */
 	else {
 	    vinum_conf.drives_used++;			    /* another drive in use */
 	    /* Parse the configuration, and add it to the global configuration */
 	    for (cptr = config_text; *cptr != '\0';) {	    /* love this style(9) */
 		volatile int parse_status;		    /* return value from parse_config */
 
 		for (eptr = config_line; (*cptr != '\n') && (*cptr != '\0');) /* until the end of the line */
 		    *eptr++ = *cptr++;
 		*eptr = '\0';				    /* and delimit */
 		if (setjmp(command_fail) == 0) {	    /* come back here on error and continue */
 		    parse_status = parse_config(config_line, &keyword_set, 1); /* parse the config line */
 		    if (parse_status < 0) {		    /* error in config */
 			/*
 			   * This config should have been parsed in user
 			   * space.  If we run into problems here, something
 			   * serious is afoot.  Complain and let the user
 			   * snarf the config to see what's wrong 
 			 */
 			log(LOG_ERR,
 			    "vinum: Config error on drive %s, aborting integration\n",
 			    nd.ni_dirp);
 			Free(config_text);
 			Free(config_line);
 			free_drive(drive);		    /* give it back */
 			status = EINVAL;
 		    }
 		}
 		while (*cptr == '\n')
 		    cptr++;				    /* skip to next line */
 	    }
 	}
 	drive->flags |= VF_CONFIGURED;			    /* read this drive's configuration */
     }
 
     Free(config_text);
     Free(drivelist);
     vinum_conf.flags &= ~VF_READING_CONFIG;		    /* no longer reading from disk */
     if (status != 0)
 	throw_rude_remark(status, "Couldn't read configuration");
     updateconfig(VF_READING_CONFIG);			    /* update from disk config */
     return 0;
 }
 
 /*
  * Compare the modification dates of the drives, for qsort.
  * Return 1 if a < b, 0 if a == b, 01 if a > b: in other
  * words, sort backwards 
  */
 int 
 drivecmp(const void *va, const void *vb)
 {
     const struct drive *a = &DRIVE[*(const int *) va];
     const struct drive *b = &DRIVE[*(const int *) vb];
 
     if ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
 	&& (a->label.last_update.tv_usec == b->label.last_update.tv_usec))
 	return 0;
     else if ((a->label.last_update.tv_sec > b->label.last_update.tv_sec)
 	    || ((a->label.last_update.tv_sec == b->label.last_update.tv_sec)
 	    && (a->label.last_update.tv_usec > b->label.last_update.tv_usec)))
 	return -1;
     else
 	return 1;
 }
Index: head/sys/dev/vinum/vinumraid5.c
===================================================================
--- head/sys/dev/vinum/vinumraid5.c	(revision 49534)
+++ head/sys/dev/vinum/vinumraid5.c	(revision 49535)
@@ -1,638 +1,637 @@
 /*-
  * Copyright (c) 1997, 1998
  *	Cybernet Corporation and Nan Yang Computer Services Limited.
  *      All rights reserved.
  *
  *  This software was developed as part of the NetMAX project.
  *
  *  Written by Greg Lehey
  *
  *  This software is distributed under the so-called ``Berkeley
  *  License'':
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Cybernet Corporation 
  *      and Nan Yang Computer Services Limited
  * 4. Neither the name of the Companies nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * This software is provided ``as is'', and any express or implied
  * warranties, including, but not limited to, the implied warranties of
  * merchantability and fitness for a particular purpose are disclaimed.
  * In no event shall the company or contributors be liable for any
  * direct, indirect, incidental, special, exemplary, or consequential
  * damages (including, but not limited to, procurement of substitute
  * goods or services; loss of use, data, or profits; or business
  * interruption) however caused and on any theory of liability, whether
  * in contract, strict liability, or tort (including negligence or
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *
- * $Id: raid5.c,v 1.15 1999/07/07 03:46:01 grog Exp grog $
+ * $Id: vinumraid5.c,v 1.1 1999/08/07 08:22:49 grog Exp $
  */
 /*
  * XXX To do:
  *
  * lock ranges while calculating parity
  */
 
 #include <dev/vinum/vinumhdr.h>
 #include <dev/vinum/request.h>
-#include <miscfs/specfs/specdev.h>
 #include <sys/resourcevar.h>
 
 /*
  * Parameters which describe the current transfer.
  * These are only used for calculation, but they
  * need to be passed to other functions, so it's
  * tidier to put them in a struct 
  */
 struct metrics {
     daddr_t stripebase;					    /* base address of stripe (1st subdisk) */
     int stripeoffset;					    /* offset in stripe */
     int stripesectors;					    /* total sectors to transfer in this stripe */
     daddr_t sdbase;					    /* offset in subdisk of stripe base */
     int sdcount;					    /* number of disks involved in this transfer */
     daddr_t diskstart;					    /* remember where this transfer starts */
     int psdno;						    /* number of parity subdisk */
     int badsdno;					    /* number of down subdisk, if there is one */
     int firstsdno;					    /* first data subdisk number */
     /* These correspond to the fields in rqelement, sort of */
     int useroffset;
     /*
      * Initial offset and length values for the first
      * data block 
      */
     int initoffset;					    /* start address of block to transfer */
     short initlen;					    /* length in sectors of data transfer */
     /* Define a normal operation */
     int dataoffset;					    /* start address of block to transfer */
     int datalen;					    /* length in sectors of data transfer */
     /* Define a group operation */
     int groupoffset;					    /* subdisk offset of group operation */
     int grouplen;					    /* length in sectors of group operation */
     /* Define a normal write operation */
     int writeoffset;					    /* subdisk offset of normal write */
     int writelen;					    /* length in sectors of write operation */
     enum xferinfo flags;				    /* to check what we're doing */
     int rqcount;					    /* number of elements in request */
 };
 
 enum requeststatus bre5(struct request *rq,
     int plexno,
     daddr_t * diskstart,
     daddr_t diskend);
 void complete_raid5_write(struct rqelement *);
 enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
 void setrqebounds(struct rqelement *rqe, struct metrics *mp);
 
 /*
  * define the low-level requests needed to perform a
  * high-level I/O operation for a specific plex 'plexno'.
  *
  * Return 0 if all subdisks involved in the request are up, 1 if some
  * subdisks are not up, and -1 if the request is at least partially
  * outside the bounds of the subdisks.
  *
  * Modify the pointer *diskstart to point to the end address.  On
  * read, return on the first bad subdisk, so that the caller
  * (build_read_request) can try alternatives.
  *
  * On entry to this routine, the prq structures are not assigned.  The
  * assignment is performed by expandrq().  Strictly speaking, the
  * elements rqe->sdno of all entries should be set to -1, since 0
  * (from bzero) is a valid subdisk number.  We avoid this problem by
  * initializing the ones we use, and not looking at the others (index
  * >= prq->requests).
  */
 enum requeststatus 
 bre5(struct request *rq,
     int plexno,
     daddr_t * diskaddr,
     daddr_t diskend)
 {
     struct metrics m;					    /* most of the information */
     struct sd *sd;
     struct plex *plex;
     struct buf *bp;					    /* user's bp */
     struct rqgroup *rqg;				    /* the request group that we will create */
     struct rqelement *rqe;				    /* point to this request information */
     int rsectors;					    /* sectors remaining in this stripe */
     int mysdno;						    /* another sd index in loops */
     int rqno;						    /* request number */
 
     m.diskstart = *diskaddr;				    /* start of transfer */
     bp = rq->bp;					    /* buffer pointer */
     plex = &PLEX[plexno];				    /* point to the plex */
 
 
     while (*diskaddr < diskend) {			    /* until we get it all sorted out */
 	struct rqelement *prqe = NULL;			    /* XXX */
 	m.badsdno = -1;					    /* no bad subdisk yet */
 
 	/* Part A: Define the request */
 	/*
 	 * First, calculate some sizes:
 	 * The offset of the start address from
 	 * the start of the stripe 
 	 */
 	m.stripeoffset = *diskaddr % (plex->stripesize * (plex->subdisks - 1));
 
 	/*
 	 * The plex-relative address of the
 	 * start of the stripe 
 	 */
 	m.stripebase = *diskaddr - m.stripeoffset;
 
 	/* subdisk containing the parity stripe */
 	m.psdno = plex->subdisks - 1 - (*diskaddr / (plex->stripesize * (plex->subdisks - 1))) % plex->subdisks;
 
 	/*
 	 * The number of the subdisk in which
 	 * the start is located 
 	 */
 	m.firstsdno = m.stripeoffset / plex->stripesize;
 	if (m.firstsdno >= m.psdno)			    /* at or past parity sd */
 	    m.firstsdno++;				    /* increment it */
 
 	/*
 	 * The offset from the beginning of
 	 * the stripe on this subdisk 
 	 */
 	m.initoffset = m.stripeoffset % plex->stripesize;
 
 	/* The offset of the stripe start relative to this subdisk */
 	m.sdbase = m.stripebase / (plex->subdisks - 1);
 
 	m.useroffset = *diskaddr - m.diskstart;		    /* The offset of the start in the user buffer */
 
 	/*
 	 * The number of sectors to transfer in the
 	 * current (first) subdisk 
 	 */
 	m.initlen = min(diskend - *diskaddr,		    /* the amount remaining to transfer */
 	    plex->stripesize - m.initoffset);		    /* and the amount left in this block */
 
 	/*
 	 * The number of sectors to transfer in this stripe
 	 * is the minumum of the amount remaining to transfer
 	 * and the amount left in this stripe 
 	 */
 	m.stripesectors = min(diskend - *diskaddr,
 	    plex->stripesize * (plex->subdisks - 1) - m.stripeoffset);
 
 	/* The number of data subdisks involved in this request */
 	m.sdcount = (m.stripesectors + m.initoffset + plex->stripesize - 1) / plex->stripesize;
 
 	/* Part B: decide what kind of transfer this will be */
 	/*
 	 * start and end addresses of the transfer in
 	 * the current block.
 	 *
 	 * There are a number of different kinds of transfer, each of which relates to a
 	 * specific subdisk:
 	 *
 	 * 1. Normal read.  All participating subdisks are up, and the transfer can be
 	 *    made directly to the user buffer.  The bounds of the transfer are described
 	 *    by m.dataoffset and m.datalen.  We have already calculated m.initoffset and
 	 *    m.initlen, which define the parameters for the first data block.
 	 *
 	 * 2. Recovery read.  One participating subdisk is down.  To recover data, all
 	 *    the other subdisks, including the parity subdisk, must be read.  The data is
 	 *    recovered by exclusive-oring all the other blocks.  The bounds of the transfer
 	 *    are described by m.groupoffset and m.grouplen.
 	 *
 	 * 3. A read request may request reading both available data (normal read) and
 	 *    non-available data (recovery read).  This can be a problem if the address ranges
 	 *    of the two reads do not coincide: in this case, the normal read needs to be
 	 *    extended to cover the address range of the recovery read, and must thus be
 	 *    performed out of malloced memory.
 	 *
 	 * 4. Normal write.  All the participating subdisks are up.  The bounds of the transfer
 	 *    are described by m.dataoffset and m.datalen.  Since these values differ for each
 	 *    block, we calculate the bounds for the parity block independently as the maximum
 	 *    of the individual blocks and store these values in m.writeoffset and m.writelen.
 	 *    This write proceeds in four phases:
 	 *
 	 *    i.   Read the old contents of each block and the parity block.
 	 *
 	 *    ii.  ``Remove'' the old contents from the parity block with exclusive or.
 	 *
 	 *    iii. ``Insert'' the new contents of the block in the parity block, again with
 	 *          exclusive or.
 	 *
 	 *    iv.   Write the new contents of the data blocks and the parity block.  The data block
 	 *          transfers can be made directly from the user buffer.
 	 *
 	 * 5. Degraded write where the data block is not available.  The bounds of the
 	 *    transfer are described by m.groupoffset and m.grouplen. This requires the
 	 *    following steps:
 	 *
 	 *    i.   Read in all the other data blocks, excluding the parity block.
 	 *
 	 *    ii.  Recreate the parity block from the other data blocks and the data to be written.
 	 *
 	 *    iii. Write the parity block.
 	 *
 	 * 6. Parityless write, a write where the parity block is not available.  This
 	 *    is in fact the simplest: just write the data blocks.  This can proceed directly
 	 *    from the user buffer.  The bounds of the transfer are described
 	 *    by m.dataoffset and m.datalen.
 	 *
 	 * 7. Combination of degraded data block write and normal write.  In this case the
 	 *    address ranges of the reads may also need to be extended to cover all
 	 *    participating blocks.
 	 *
 	 * All requests in a group transfer transfer the same address range relative
 	 * to their subdisk.  The individual transfers may vary, but since our group of
 	 * requests is all in a single slice, we can define a range in which they all
 	 * fall.
 	 *
 	 * In the following code section, we determine which kind of transfer we will perform.
 	 * If there is a group transfer, we also decide its bounds relative to the subdisks.
 	 * At the end, we have the following values:
 	 *
 	 *          m.flags indicates the kinds of transfers we will perform
 	 *          m.initoffset indicates the offset of the beginning of any data
 	 *            operation relative to the beginning of the stripe base.
 	 *          m.initlen specifies the length of any data operation.
 	 *          m.dataoffset contains the same value as m.initoffset.
 	 *          m.datalen contains the same value as m.initlen.  Initially
 	 *            dataoffset and datalen describe the parameters for the first
 	 *            data block; while building the data block requests, they are
 	 *            updated for each block.
 	 *          m.groupoffset indicates the offset of any group operation relative
 	 *            to the beginning of the stripe base
 	 *          m.grouplen specifies the length of any group operation
 	 *          m.writeoffset indicates the offset of a normal write relative
 	 *            to the beginning of the stripe base.  This value differs from
 	 *            m.dataoffset in that it applies to the entire operation, and
 	 *            not just the first block.
 	 *          m.writelen specifies the total span of a normal write operation.
 	 *            writeoffset and writelen are used to define the parity block.
 	 */
 	m.groupoffset = 0;				    /* assume no group... */
 	m.grouplen = 0;					    /* until we know we have one */
 	m.writeoffset = m.initoffset;			    /* start offset of transfer */
 	m.writelen = 0;					    /* nothing to write yet */
 	m.flags = 0;					    /* no flags yet */
 	rsectors = m.stripesectors;			    /* remaining sectors to examine */
 	m.dataoffset = m.initoffset;			    /* start at the beginning of the transfer */
 	m.datalen = m.initlen;
 
 	if (m.sdcount > 1) {
 	    plex->multiblock++;				    /* more than one block for the request */
 	    /*
 	     * If we have two transfers that don't overlap,
 	     * (one at the end of the first block, the other
 	     * at the beginning of the second block),
 	     * it's cheaper to split them 
 	     */
 	    if (rsectors < plex->stripesize) {
 		m.sdcount = 1;				    /* just one subdisk */
 		m.stripesectors = m.initlen;		    /* and just this many sectors */
 		rsectors = m.initlen;			    /* and in the loop counter */
 	    }
 	}
 	if (SD[plex->sdnos[m.psdno]].state < sd_reborn)	    /* is our parity subdisk down? */
 	    m.badsdno = m.psdno;			    /* note that it's down */
 	if (bp->b_flags & B_READ) {			    /* read operation */
 	    for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
 		if (mysdno == m.psdno)			    /* ignore parity on read */
 		    mysdno++;
 		if (mysdno == plex->subdisks)		    /* wraparound */
 		    mysdno = 0;
 		if (mysdno == m.psdno)			    /* parity, */
 		    mysdno++;				    /* we've given already */
 
 		if (SD[plex->sdnos[mysdno]].state < sd_reborn) { /* got a bad subdisk, */
 		    if (m.badsdno >= 0)			    /* we had one already, */
 			/*
 			   * XXX be cleverer here.  We can still
 			   * read what we can read.
 			 */
 			return REQUEST_DOWN;		    /* we can't take a second */
 		    m.badsdno = mysdno;			    /* got the first */
 		    m.groupoffset = m.dataoffset;	    /* define the bounds */
 		    m.grouplen = m.datalen;
 		    m.flags |= XFR_RECOVERY_READ;	    /* we need recovery */
 		    plex->recovered_reads++;		    /* count another one */
 		} else
 		    m.flags |= XFR_NORMAL_READ;		    /* normal read */
 
 		/* Update the pointers for the next block */
 		m.dataoffset = 0;			    /* back to the start of the stripe */
 		rsectors -= m.datalen;			    /* remaining sectors to examine */
 		m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
 	    }
 	} else {					    /* write operation */
 	    for (mysdno = m.firstsdno; rsectors > 0; mysdno++) {
 		if (mysdno == m.psdno)			    /* parity stripe, we've dealt with that */
 		    mysdno++;
 		if (mysdno == plex->subdisks)		    /* wraparound */
 		    mysdno = 0;
 		if (mysdno == m.psdno)			    /* parity, */
 		    mysdno++;				    /* we've given already */
 
 		sd = &SD[plex->sdnos[mysdno]];
 		if (sd->state != sd_up) {
 		    enum requeststatus s;
 
 		    s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
 		    if (s && (m.badsdno >= 0)) {	    /* second bad disk, */
 			int sdno;
 			/*
 			 * If the parity disk is down, there's
 			 * no recovery.  We make all involved
 			 * subdisks stale.  Otherwise, we
 			 * should be able to recover, but it's
 			 * like pulling teeth.  Fix it later.
 			 *
 			 * XXX be cleverer here.  We should
 			 * still write what we can write.
 			 */
 			for (sdno = 0; sdno < m.sdcount; sdno++) {
 			    struct sd *sd = &SD[plex->sdnos[sdno]];
 			    if (sd->state >= sd_reborn)	    /* sort of up, */
 				set_sd_state(sd->sdno, sd_stale, setstate_force); /* make it stale */
 			}
 			return s;			    /* and crap out */
 		    }
 		    m.badsdno = mysdno;			    /* note which one is bad */
 		    m.flags |= XFR_DEGRADED_WRITE;	    /* we need recovery */
 		    plex->degraded_writes++;		    /* count another one */
 		    m.groupoffset = m.dataoffset;	    /* define the bounds */
 		    m.grouplen = m.datalen;
 		} else {
 		    m.flags |= XFR_NORMAL_WRITE;	    /* normal write operation */
 		    if (m.writeoffset > m.dataoffset) {	    /* move write operation lower */
 			m.writelen = max(m.writeoffset + m.writelen,
 			    m.dataoffset + m.datalen)
 			    - m.dataoffset;
 			m.writeoffset = m.dataoffset;
 		    } else
 			m.writelen = max(m.writeoffset + m.writelen,
 			    m.dataoffset + m.datalen)
 			    - m.writeoffset;
 		}
 
 		/* Update the pointers for the next block */
 		m.dataoffset = 0;			    /* back to the start of the stripe */
 		rsectors -= m.datalen;			    /* remaining sectors to examine */
 		m.datalen = min(rsectors, plex->stripesize); /* amount that will fit in this block */
 	    }
 	    if (m.badsdno == m.psdno) {			    /* got a bad parity block, */
 		struct sd *psd = &SD[plex->sdnos[m.psdno]];
 
 		if (psd->state == sd_down)
 		    set_sd_state(psd->sdno, sd_obsolete, setstate_force); /* it's obsolete now */
 		else if (psd->state == sd_crashed)
 		    set_sd_state(psd->sdno, sd_stale, setstate_force); /* it's stale now */
 		m.flags &= ~XFR_NORMAL_WRITE;		    /* this write isn't normal, */
 		m.flags |= XFR_PARITYLESS_WRITE;	    /* it's parityless */
 		plex->parityless_writes++;		    /* count another one */
 	    }
 	}
 
 	/* reset the initial transfer values */
 	m.dataoffset = m.initoffset;			    /* start at the beginning of the transfer */
 	m.datalen = m.initlen;
 
 	/*
 	 * XXX see if we can satisfy a recovery_read from a
 	 * different plex.  If so, return from here with no requests WRITEME 
 	 */
 
 	/* decide how many requests we need */
 	if (m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))	/* doing a recovery read or degraded write, */
 	    m.rqcount = plex->subdisks;			    /* all subdisks */
 	else if (m.flags & XFR_NORMAL_WRITE)		    /* normal write, */
 	    m.rqcount = m.sdcount + 1;			    /* all data blocks and the parity block */
 	else						    /* parityless write or normal read */
 	    m.rqcount = m.sdcount;			    /* just the data blocks */
 
 	/* Part C: build the requests */
 	rqg = allocrqg(rq, m.rqcount);			    /* get a request group */
 	if (rqg == NULL) {				    /* malloc failed */
 	    bp->b_flags |= B_ERROR;
 	    bp->b_error = ENOMEM;
 	    biodone(bp);
 	    return REQUEST_ENOMEM;
 	}
 	rqg->plexno = plexno;
 	rqg->flags = m.flags;
 	rqno = 0;					    /* index in the request group */
 
 	/* 1: PARITY BLOCK */
 	/*
 	 * Are we performing an operation which requires parity?  In that case,
 	 * work out the parameters and define the parity block.
 	 * XFR_PARITYOP is XFR_NORMAL_WRITE | XFR_RECOVERY_READ | XFR_DEGRADED_WRITE 
 	 */
 	if (m.flags & XFR_PARITYOP) {			    /* need parity */
 	    rqe = &rqg->rqe[rqno];			    /* point to element */
 	    sd = &SD[plex->sdnos[m.psdno]];		    /* the subdisk in question */
 	    rqe->rqg = rqg;				    /* point back to group */
 	    rqe->flags = (m.flags | XFR_PARITY_BLOCK | XFR_MALLOCED) /* always malloc parity block */
 	    &~(XFR_NORMAL_READ | XFR_PARITYLESS_WRITE);	    /* transfer flags without data op stuf */
 	    setrqebounds(rqe, &m);			    /* set up the bounds of the transfer */
 	    rqe->sdno = sd->sdno;			    /* subdisk number */
 	    rqe->driveno = sd->driveno;
 	    prqe = rqe;					    /* debug XXX */
 	    if (build_rq_buffer(rqe, plex))		    /* build the buffer */
 		return REQUEST_ENOMEM;			    /* can't do it */
 	    rqe->b.b_flags |= B_READ;			    /* we must read first */
 	    m.sdcount++;				    /* adjust the subdisk count */
 	    rqno++;					    /* and point to the next request */
 	}
 	/*
 	 * 2: DATA BLOCKS
 	 * Now build up requests for the blocks required
 	 * for individual transfers 
 	 */
 	for (mysdno = m.firstsdno; rqno < m.sdcount; mysdno++, rqno++) {
 	    if (mysdno == m.psdno)			    /* parity, */
 		mysdno++;				    /* we've given already */
 	    if (mysdno == plex->subdisks)		    /* got to the end, */
 		mysdno = 0;				    /* wrap around */
 	    if (mysdno == m.psdno)			    /* parity, */
 		mysdno++;				    /* we've given already */
 
 	    rqe = &rqg->rqe[rqno];			    /* point to element */
 	    sd = &SD[plex->sdnos[mysdno]];		    /* the subdisk in question */
 	    rqe->rqg = rqg;				    /* point to group */
 	    if (m.flags & XFR_NEEDS_MALLOC)		    /* we need a malloced buffer first */
 		rqe->flags = m.flags | XFR_DATA_BLOCK | XFR_MALLOCED; /* transfer flags */
 	    else
 		rqe->flags = m.flags | XFR_DATA_BLOCK;	    /* transfer flags */
 	    if (mysdno == m.badsdno) {			    /* this is the bad subdisk */
 		rqg->badsdno = rqno;			    /* note which one */
 		rqe->flags |= XFR_BAD_SUBDISK;		    /* note that it's dead */
 		/*
 		 * we can't read or write from/to it,
 		 * but we don't need to malloc 
 		 */
 		rqe->flags &= ~(XFR_MALLOCED | XFR_NORMAL_READ | XFR_NORMAL_WRITE);
 	    }
 	    setrqebounds(rqe, &m);			    /* set up the bounds of the transfer */
 #if VINUMDEBUG
 	    if (prqe
 		&& (rqe->groupoffset + rqe->sdoffset) < prqe->sdoffset)	/* XXX */
 		Debugger("Low data block");		    /* XXX */
 #endif
 	    rqe->useroffset = m.useroffset;		    /* offset in user buffer */
 	    rqe->sdno = sd->sdno;			    /* subdisk number */
 	    rqe->driveno = sd->driveno;
 	    if (build_rq_buffer(rqe, plex))		    /* build the buffer */
 		return REQUEST_ENOMEM;			    /* can't do it */
 	    if ((m.flags & XFR_PARITYOP)		    /* parity operation, */
 	    &&((m.flags & XFR_BAD_SUBDISK) == 0))	    /* and not the bad subdisk, */
 		rqe->b.b_flags |= B_READ;		    /* we must read first */
 
 	    /* Now update pointers for the next block */
 	    *diskaddr += m.datalen;			    /* skip past what we've done */
 	    m.stripesectors -= m.datalen;		    /* deduct from what's left */
 	    m.useroffset += m.datalen;			    /* and move on in the user buffer */
 	    m.datalen = min(m.stripesectors, plex->stripesize);	/* and recalculate */
 	    m.dataoffset = 0;				    /* start at the beginning of next block */
 	}
 
 	/*
 	 * 3: REMAINING BLOCKS FOR RECOVERY
 	 * Finally, if we have a recovery operation, build
 	 * up transfers for the other subdisks.  Follow the
 	 * subdisks around until we get to where we started.
 	 * These requests use only the group parameters. 
 	 */
 	if ((rqno < m.rqcount)				    /* haven't done them all already */
 	&&(m.flags & (XFR_RECOVERY_READ | XFR_DEGRADED_WRITE))) {
 	    for (; rqno < m.rqcount; rqno++, mysdno++) {
 		if (mysdno == m.psdno)			    /* parity, */
 		    mysdno++;				    /* we've given already */
 		if (mysdno == plex->subdisks)		    /* got to the end, */
 		    mysdno = 0;				    /* wrap around */
 		if (mysdno == m.psdno)			    /* parity, */
 		    mysdno++;				    /* we've given already */
 
 		rqe = &rqg->rqe[rqno];			    /* point to element */
 		sd = &SD[plex->sdnos[mysdno]];		    /* the subdisk in question */
 		rqe->rqg = rqg;				    /* point to group */
 
 		rqe->sdoffset = m.sdbase + m.groupoffset;   /* start of transfer */
 		rqe->dataoffset = 0;			    /* for tidiness' sake */
 		rqe->groupoffset = 0;			    /* group starts at the beginining */
 		rqe->datalen = 0;
 		rqe->grouplen = m.grouplen;
 		rqe->buflen = m.grouplen;
 		rqe->flags = (m.flags | XFR_MALLOCED) & ~XFR_DATAOP; /* transfer flags without data op stuf */
 		rqe->sdno = sd->sdno;			    /* subdisk number */
 		rqe->driveno = sd->driveno;
 		if (build_rq_buffer(rqe, plex))		    /* build the buffer */
 		    return REQUEST_ENOMEM;		    /* can't do it */
 		rqe->b.b_flags |= B_READ;		    /* we must read first */
 	    }
 	}
 	if (*diskaddr < diskend)			    /* didn't finish the request on this stripe */
 	    plex->multistripe++;			    /* count another one */
     }
     return REQUEST_OK;
 }
 
 /*
  * Helper function for rqe5: adjust the bounds of the transfers to minimize
  * the buffer allocation.
  *
  * Each request can handle two of three different data ranges:
  *
  * 1.  The range described by the parameters dataoffset and datalen,
  *     for normal read or parityless write.
  * 2.  The range described by the parameters groupoffset and grouplen,
  *     for recovery read and degraded write.
  * 3.  For normal write, the range depends on the kind of block.  For
  *     data blocks, the range is defined by dataoffset and datalen.  For
  *     parity blocks, it is defined by writeoffset and writelen.
  *
  * In order not to allocate more memory than necessary, this function
  * adjusts the bounds parameter for each request to cover just the minimum
  * necessary for the function it performs.  This will normally vary from one
  * request to the next.
  *
  * Things are slightly different for the parity block.  In this case, the bounds
  * defined by mp->writeoffset and mp->writelen also play a r�le.  Select this
  * case by setting the parameter forparity != 0
  */
 void 
 setrqebounds(struct rqelement *rqe, struct metrics *mp)
 {
     /* parity block of a normal write */
     if ((rqe->flags & (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) == (XFR_NORMAL_WRITE | XFR_PARITY_BLOCK)) { /* case 3 */
 	if (rqe->flags & XFR_DEGRADED_WRITE) {		    /* also degraded write */
 	    /*
 	     * With a combined normal and degraded write, we
 	     * will zero out the area of the degraded write
 	     * in the second phase, so we don't need to read
 	     * it in.  Unfortunately, we need a way to tell
 	     * build_request_buffer the size of the buffer,
 	     * and currently that's the length of the read.
 	     * As a result, we read everything, even the stuff
 	     * that we're going to nuke.
 	     * FIXME XXX 
 	     */
 	    if (mp->groupoffset < mp->writeoffset) {	    /* group operation starts lower */
 		rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
 		rqe->dataoffset = mp->writeoffset - mp->groupoffset; /* data starts here */
 		rqe->groupoffset = 0;			    /* and the group at the beginning */
 	    } else {					    /* individual data starts first */
 		rqe->sdoffset = mp->sdbase + mp->writeoffset; /* start of transfer */
 		rqe->dataoffset = 0;			    /* individual data starts at the beginning */
 		rqe->groupoffset = mp->groupoffset - mp->writeoffset; /* group starts here */
 	    }
 	    rqe->datalen = mp->writelen;
 	    rqe->grouplen = mp->grouplen;
 	} else {					    /* just normal write (case 3) */
 	    rqe->sdoffset = mp->sdbase + mp->writeoffset;   /* start of transfer */
 	    rqe->dataoffset = 0;			    /* degradation starts at the beginning */
 	    rqe->groupoffset = 0;			    /* for tidiness' sake */
 	    rqe->datalen = mp->writelen;
 	    rqe->grouplen = 0;
 	}
     } else if (rqe->flags & XFR_DATAOP) {		    /* data operation (case 1 or 3) */
 	if (rqe->flags & XFR_GROUPOP) {			    /* also a group operation (case 2) */
 	    if (mp->groupoffset < mp->dataoffset) {	    /* group operation starts lower */
 		rqe->sdoffset = mp->sdbase + mp->groupoffset; /* start of transfer */
 		rqe->dataoffset = mp->dataoffset - mp->groupoffset; /* data starts here */
 		rqe->groupoffset = 0;			    /* and the group at the beginning */
 	    } else {					    /* individual data starts first */
 		rqe->sdoffset = mp->sdbase + mp->dataoffset; /* start of transfer */
 		rqe->dataoffset = 0;			    /* individual data starts at the beginning */
 		rqe->groupoffset = mp->groupoffset - mp->dataoffset; /* group starts here */
 	    }
 	    rqe->datalen = mp->datalen;
 	    rqe->grouplen = mp->grouplen;
 	} else {					    /* just data operation (case 1) */
 	    rqe->sdoffset = mp->sdbase + mp->dataoffset;    /* start of transfer */
 	    rqe->dataoffset = 0;			    /* degradation starts at the beginning */
 	    rqe->groupoffset = 0;			    /* for tidiness' sake */
 	    rqe->datalen = mp->datalen;
 	    rqe->grouplen = 0;
 	}
     } else {						    /* just group operations (case 2) */
 	rqe->sdoffset = mp->sdbase + mp->groupoffset;	    /* start of transfer */
 	rqe->dataoffset = 0;				    /* for tidiness' sake */
 	rqe->groupoffset = 0;				    /* group starts at the beginining */
 	rqe->datalen = 0;
 	rqe->grouplen = mp->grouplen;
     }
     rqe->buflen = max(rqe->dataoffset + rqe->datalen,	    /* total buffer length */
 	rqe->groupoffset + rqe->grouplen);
 }
Index: head/sys/dev/vinum/vinumrequest.c
===================================================================
--- head/sys/dev/vinum/vinumrequest.c	(revision 49534)
+++ head/sys/dev/vinum/vinumrequest.c	(revision 49535)
@@ -1,1064 +1,1063 @@
 /*-
  * Copyright (c) 1997, 1998, 1999
  *  Nan Yang Computer Services Limited.  All rights reserved.
  *
  *  Parts copyright (c) 1997, 1998 Cybernet Corporation, NetMAX project.
  *
  *  Written by Greg Lehey
  *
  *  This software is distributed under the so-called ``Berkeley
  *  License'':
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Nan Yang Computer
  *      Services Limited.
  * 4. Neither the name of the Company nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * This software is provided ``as is'', and any express or implied
  * warranties, including, but not limited to, the implied warranties of
  * merchantability and fitness for a particular purpose are disclaimed.
  * In no event shall the company or contributors be liable for any
  * direct, indirect, incidental, special, exemplary, or consequential
  * damages (including, but not limited to, procurement of substitute
  * goods or services; loss of use, data, or profits; or business
  * interruption) however caused and on any theory of liability, whether
  * in contract, strict liability, or tort (including negligence or
  * otherwise) arising in any way out of the use of this software, even if
  * advised of the possibility of such damage.
  *
- * $Id: vinumrequest.c,v 1.24 1999/07/05 01:53:14 grog Exp grog $
+ * $Id: vinumrequest.c,v 1.29 1999/08/07 08:13:23 grog Exp $
  */
 
 #include <dev/vinum/vinumhdr.h>
 #include <dev/vinum/request.h>
-#include <miscfs/specfs/specdev.h>
 #include <sys/resourcevar.h>
 
 enum requeststatus bre(struct request *rq,
     int plexno,
     daddr_t * diskstart,
     daddr_t diskend);
 enum requeststatus bre5(struct request *rq,
     int plexno,
     daddr_t * diskstart,
     daddr_t diskend);
 enum requeststatus build_read_request(struct request *rq, int volplexno);
 enum requeststatus build_write_request(struct request *rq);
 enum requeststatus build_rq_buffer(struct rqelement *rqe, struct plex *plex);
 void freerq(struct request *rq);
 int find_alternate_sd(struct request *rq);
 int check_range_covered(struct request *);
 void complete_rqe(struct buf *bp);
 void complete_raid5_write(struct rqelement *);
 int abortrequest(struct request *rq, int error);
 void sdio_done(struct buf *bp);
 int vinum_bounds_check(struct buf *bp, struct volume *vol);
 caddr_t allocdatabuf(struct rqelement *rqe);
 void freedatabuf(struct rqelement *rqe);
 
 #ifdef VINUMDEBUG
 struct rqinfo rqinfo[RQINFO_SIZE];
 struct rqinfo *rqip = rqinfo;
 
 void 
 logrq(enum rqinfo_type type, union rqinfou info, struct buf *ubp)
 {
     int s = splhigh();
 
     microtime(&rqip->timestamp);			    /* when did this happen? */
     rqip->type = type;
     rqip->bp = ubp;					    /* user buffer */
     switch (type) {
     case loginfo_user_bp:
     case loginfo_user_bpl:
 	bcopy(info.bp, &rqip->info.b, sizeof(struct buf));
 	rqip->devmajor = major(info.bp->b_dev);
 	rqip->devminor = minor(info.bp->b_dev);
 	break;
 
     case loginfo_iodone:
     case loginfo_rqe:
     case loginfo_raid5_data:
     case loginfo_raid5_parity:
 	bcopy(info.rqe, &rqip->info.rqe, sizeof(struct rqelement));
 	rqip->devmajor = major(info.rqe->b.b_dev);
 	rqip->devminor = minor(info.rqe->b.b_dev);
 	break;
 
     case loginfo_unused:
 	break;
     }
     rqip++;
     if (rqip >= &rqinfo[RQINFO_SIZE])			    /* wrap around */
 	rqip = rqinfo;
     splx(s);
 }
 
 #endif
 
 void 
 vinumstrategy(struct buf *bp)
 {
     int volno;
     struct volume *vol = NULL;
 
     switch (DEVTYPE(bp->b_dev)) {
     case VINUM_SD_TYPE:
     case VINUM_RAWSD_TYPE:
 	sdio(bp);
 	return;
 
 	/*
 	 * In fact, vinum doesn't handle drives: they're
 	 * handled directly by the disk drivers
 	 */
     case VINUM_DRIVE_TYPE:
     default:
 	bp->b_error = EIO;				    /* I/O error */
 	bp->b_flags |= B_ERROR;
 	biodone(bp);
 	return;
 
     case VINUM_VOLUME_TYPE:				    /* volume I/O */
 	volno = Volno(bp->b_dev);
 	vol = &VOL[volno];
 	if (vol->state != volume_up) {			    /* can't access this volume */
 	    bp->b_error = EIO;				    /* I/O error */
 	    bp->b_flags |= B_ERROR;
 	    biodone(bp);
 	    return;
 	}
 	if (vinum_bounds_check(bp, vol) <= 0) {		    /* don't like them bounds */
 	    biodone(bp);				    /* have nothing to do with this */
 	    return;
 	}
 	/* FALLTHROUGH */
 	/*
 	 * Plex I/O is pretty much the same as volume I/O
 	 * for a single plex.  Indicate this by passing a NULL
 	 * pointer (set above) for the volume
 	 */
     case VINUM_PLEX_TYPE:
     case VINUM_RAWPLEX_TYPE:
 	bp->b_resid = bp->b_bcount;			    /* transfer everything */
 	vinumstart(bp, 0);
 	return;
     }
 }
 
 /*
  * Start a transfer.  Return -1 on error,
  * 0 if OK, 1 if we need to retry.
  * Parameter reviveok is set when doing
  * transfers for revives: it allows transfers to
  * be started immediately when a revive is in
  * progress.  During revive, normal transfers
  * are queued if they share address space with
  * a currently active revive operation.
  */
 int 
 vinumstart(struct buf *bp, int reviveok)
 {
     int plexno;
     int maxplex;					    /* maximum number of plexes to handle */
     struct volume *vol;
     struct request *rq;					    /* build up our request here */
     enum requeststatus status;
 
 #if VINUMDEBUG
     if (debug & DEBUG_LASTREQS)
 	logrq(loginfo_user_bp, (union rqinfou) bp, bp);
 #endif
 
     /*
      * XXX In these routines, we're assuming that
      * we will always be called with bp->b_bcount
      * which is a multiple of the sector size.  This
      * is a reasonable assumption, since we are only
      * called from system routines.  Should we check
      * anyway?
      */
 
     if ((bp->b_bcount % DEV_BSIZE) != 0) {		    /* bad length */
 	bp->b_error = EINVAL;				    /* invalid size */
 	bp->b_flags |= B_ERROR;
 	biodone(bp);
 	return -1;
     }
     rq = (struct request *) Malloc(sizeof(struct request)); /* allocate a request struct */
     if (rq == NULL) {					    /* can't do it */
 	bp->b_error = ENOMEM;				    /* can't get memory */
 	bp->b_flags |= B_ERROR;
 	biodone(bp);
 	return -1;
     }
     bzero(rq, sizeof(struct request));
 
     /*
      * Note the volume ID.  This can be NULL, which
      * the request building functions use as an
      * indication for single plex I/O
      */
     rq->bp = bp;					    /* and the user buffer struct */
 
     if (DEVTYPE(bp->b_dev) == VINUM_VOLUME_TYPE) {	    /* it's a volume, */
 	rq->volplex.volno = Volno(bp->b_dev);		    /* get the volume number */
 	vol = &VOL[rq->volplex.volno];			    /* and point to it */
 	vol->active++;					    /* one more active request */
 	maxplex = vol->plexes;				    /* consider all its plexes */
     } else {
 	vol = NULL;					    /* no volume */
 	rq->volplex.plexno = Plexno(bp->b_dev);		    /* point to the plex */
 	rq->isplex = 1;					    /* note that it's a plex */
 	maxplex = 1;					    /* just the one plex */
     }
 
     if (bp->b_flags & B_READ) {
 	/*
 	 * This is a read request.  Decide
 	 * which plex to read from.
 	 *
 	 * There's a potential race condition here,
 	 * since we're not locked, and we could end
 	 * up multiply incrementing the round-robin
 	 * counter.  This doesn't have any serious
 	 * effects, however.
 	 */
 	if (vol != NULL) {
 	    vol->reads++;
 	    vol->bytes_read += bp->b_bcount;
 	    plexno = vol->preferred_plex;		    /* get the plex to use */
 	    if (plexno < 0) {				    /* round robin */
 		plexno = vol->last_plex_read;
 		vol->last_plex_read++;
 		if (vol->last_plex_read == vol->plexes)	    /* got the the end? */
 		    vol->last_plex_read = 0;		    /* wrap around */
 	    }
 	    status = build_read_request(rq, plexno);	    /* build a request */
 	} else {
 	    daddr_t diskaddr = bp->b_blkno;		    /* start offset of transfer */
 	    status = bre(rq,				    /* build a request list */
 		rq->volplex.plexno,
 		&diskaddr,
 		diskaddr + (bp->b_bcount / DEV_BSIZE));
 	}
 
 	if ((status > REQUEST_RECOVERED)		    /* can't satisfy it */
 	||(bp->b_flags & B_DONE)) {			    /* XXX shouldn't get this without bad status */
 	    if (status == REQUEST_DOWN) {		    /* not enough subdisks */
 		bp->b_error = EIO;			    /* I/O error */
 		bp->b_flags |= B_ERROR;
 	    }
 	    biodone(bp);
 	    freerq(rq);
 	    return -1;
 	}
 	return launch_requests(rq, reviveok);		    /* now start the requests if we can */
     } else
 	/*
 	 * This is a write operation.  We write to all
 	 * plexes.  If this is a RAID 5 plex, we must also
 	 * update the parity stripe.
 	 */
     {
 	if (vol != NULL) {
 	    vol->writes++;
 	    vol->bytes_written += bp->b_bcount;
 	    status = build_write_request(rq);		    /* Not all the subdisks are up */
 	} else {					    /* plex I/O */
 	    daddr_t diskstart;
 
 	    diskstart = bp->b_blkno;			    /* start offset of transfer */
 	    status = bre(rq,
 		Plexno(bp->b_dev),
 		&diskstart,
 		bp->b_blkno + (bp->b_bcount / DEV_BSIZE));  /* build requests for the plex */
 	}
 	if ((status > REQUEST_RECOVERED)		    /* can't satisfy it */
 	||(bp->b_flags & B_DONE)) {			    /* XXX shouldn't get this without bad status */
 	    if (status == REQUEST_DOWN) {		    /* not enough subdisks */
 		bp->b_error = EIO;			    /* I/O error */
 		bp->b_flags |= B_ERROR;
 	    }
 	    if ((bp->b_flags & B_DONE) == 0)
 		biodone(bp);
 	    freerq(rq);
 	    return -1;
 	}
 	return launch_requests(rq, reviveok);		    /* now start the requests if we can */
     }
 }
 
 /*
  * Call the low-level strategy routines to
  * perform the requests in a struct request
  */
 int 
 launch_requests(struct request *rq, int reviveok)
 {
     struct rqgroup *rqg;
     int rqno;						    /* loop index */
     struct rqelement *rqe;				    /* current element */
     int s;
 
     /*
      * First find out whether we're reviving, and the
      * request contains a conflict.  If so, we hang
      * the request off plex->waitlist of the first
      * plex we find which is reviving
      */
     if ((rq->flags & XFR_REVIVECONFLICT)		    /* possible revive conflict */
     &&(!reviveok)) {					    /* and we don't want to do it now, */
 	struct sd *sd;
 	struct request *waitlist;			    /* point to the waitlist */
 
 	sd = &SD[rq->sdno];
 	if (sd->waitlist != NULL) {			    /* something there already, */
 	    waitlist = sd->waitlist;
 	    while (waitlist->next != NULL)		    /* find the end */
 		waitlist = waitlist->next;
 	    waitlist->next = rq;			    /* hook our request there */
 	} else
 	    sd->waitlist = rq;				    /* hook our request at the front */
 
 #if VINUMDEBUG
 	if (debug & DEBUG_REVIVECONFLICT)
 	    log(LOG_DEBUG,
 		"Revive conflict sd %d: %x\n%s dev %d.%d, offset 0x%x, length %ld\n",
 		rq->sdno,
 		(u_int) rq,
 		rq->bp->b_flags & B_READ ? "Read" : "Write",
 		major(rq->bp->b_dev),
 		minor(rq->bp->b_dev),
 		rq->bp->b_blkno,
 		rq->bp->b_bcount);			    /* XXX */
 #endif
 	return 0;					    /* and get out of here */
     }
     rq->active = 0;					    /* nothing yet */
     /* XXX This is probably due to a bug */
     if (rq->rqg == NULL) {				    /* no request */
 	log(LOG_ERR, "vinum: null rqg\n");
 	abortrequest(rq, EINVAL);
 	return -1;
     }
 #if VINUMDEBUG
     if (debug & DEBUG_ADDRESSES)
 	log(LOG_DEBUG,
 	    "Request: %x\n%s dev %d.%d, offset 0x%x, length %ld\n",
 	    (u_int) rq,
 	    rq->bp->b_flags & B_READ ? "Read" : "Write",
 	    major(rq->bp->b_dev),
 	    minor(rq->bp->b_dev),
 	    rq->bp->b_blkno,
 	    rq->bp->b_bcount);				    /* XXX */
     vinum_conf.lastrq = (int) rq;
     vinum_conf.lastbuf = rq->bp;
     if (debug & DEBUG_LASTREQS)
 	logrq(loginfo_user_bpl, (union rqinfou) rq->bp, rq->bp);
 #endif
     s = splbio();
     for (rqg = rq->rqg; rqg != NULL; rqg = rqg->next) {	    /* through the whole request chain */
 	rqg->active = rqg->count;			    /* they're all active */
 	rq->active++;					    /* one more active request group */
 	for (rqno = 0; rqno < rqg->count; rqno++) {
 	    rqe = &rqg->rqe[rqno];
 	    if (rqe->flags & XFR_BAD_SUBDISK)		    /* this subdisk is bad, */
 		rqg->active--;				    /* one less active request */
 	    else if ((rqe->flags & XFR_BAD_SUBDISK) == 0) { /* subdisk isn't bad, we can do it */
 		if ((rqe->b.b_flags & B_READ) == 0)
 		    rqe->b.b_vp->v_numoutput++;		    /* one more output going */
 		rqe->b.b_flags |= B_ORDERED;		    /* XXX chase SCSI driver */
 #if VINUMDEBUG
 		if (debug & DEBUG_ADDRESSES)
 		    log(LOG_DEBUG,
 			"  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
 			rqe->b.b_flags & B_READ ? "Read" : "Write",
 			major(rqe->b.b_dev),
 			minor(rqe->b.b_dev),
 			rqe->sdno,
 			(u_int) (rqe->b.b_blkno - SD[rqe->sdno].driveoffset),
 			rqe->b.b_blkno,
 			rqe->b.b_bcount);		    /* XXX */
 		if (debug & DEBUG_NUMOUTPUT)
 		    log(LOG_DEBUG,
 			"  vinumstart sd %d numoutput %ld\n",
 			rqe->sdno,
 			rqe->b.b_vp->v_numoutput);
 		if (debug & DEBUG_LASTREQS)
 		    logrq(loginfo_rqe, (union rqinfou) rqe, rq->bp);
 #endif
 		/* fire off the request */
 		(*bdevsw(rqe->b.b_dev)->d_strategy) (&rqe->b);
 	    }
 	}
     }
     splx(s);
     return 0;
 }
 
 /*
  * define the low-level requests needed to perform a
  * high-level I/O operation for a specific plex 'plexno'.
  *
  * Return REQUEST_OK if all subdisks involved in the request are up,
  * REQUEST_DOWN if some subdisks are not up, and REQUEST_EOF if the
  * request is at least partially outside the bounds of the subdisks.
  *
  * Modify the pointer *diskstart to point to the end address.  On
  * read, return on the first bad subdisk, so that the caller
  * (build_read_request) can try alternatives.
  *
  * On entry to this routine, the rqg structures are not assigned.  The
  * assignment is performed by expandrq().  Strictly speaking, the
  * elements rqe->sdno of all entries should be set to -1, since 0
  * (from bzero) is a valid subdisk number.  We avoid this problem by
  * initializing the ones we use, and not looking at the others (index
  * >= rqg->requests).
  */
 enum requeststatus 
 bre(struct request *rq,
     int plexno,
     daddr_t * diskaddr,
     daddr_t diskend)
 {
     int sdno;
     struct sd *sd;
     struct rqgroup *rqg;
     struct buf *bp;					    /* user's bp */
     struct plex *plex;
     enum requeststatus status;				    /* return value */
     daddr_t plexoffset;					    /* offset of transfer in plex */
     daddr_t stripebase;					    /* base address of stripe (1st subdisk) */
     daddr_t stripeoffset;				    /* offset in stripe */
     daddr_t blockoffset;				    /* offset in stripe on subdisk */
     struct rqelement *rqe;				    /* point to this request information */
     daddr_t diskstart = *diskaddr;			    /* remember where this transfer starts */
     enum requeststatus s;				    /* temp return value */
 
     bp = rq->bp;					    /* buffer pointer */
     status = REQUEST_OK;				    /* return value: OK until proven otherwise */
     plex = &PLEX[plexno];				    /* point to the plex */
 
     switch (plex->organization) {
     case plex_concat:
 	sd = NULL;					    /* (keep compiler quiet) */
 	for (sdno = 0; sdno < plex->subdisks; sdno++) {
 	    sd = &SD[plex->sdnos[sdno]];
 	    if (*diskaddr < sd->plexoffset)		    /* we must have a hole, */
 		status = REQUEST_DEGRADED;		    /* note the fact */
 	    if (*diskaddr < (sd->plexoffset + sd->sectors)) { /* the request starts in this subdisk */
 		rqg = allocrqg(rq, 1);			    /* space for the request */
 		if (rqg == NULL) {			    /* malloc failed */
 		    bp->b_flags |= B_ERROR;
 		    bp->b_error = ENOMEM;
 		    biodone(bp);
 		    return REQUEST_ENOMEM;
 		}
 		rqg->plexno = plexno;
 
 		rqe = &rqg->rqe[0];			    /* point to the element */
 		rqe->rqg = rqg;				    /* group */
 		rqe->sdno = sd->sdno;			    /* put in the subdisk number */
 		plexoffset = *diskaddr;			    /* start offset in plex */
 		rqe->sdoffset = plexoffset - sd->plexoffset; /* start offset in subdisk */
 		rqe->useroffset = plexoffset - diskstart;   /* start offset in user buffer */
 		rqe->dataoffset = 0;
 		rqe->datalen = min(diskend - *diskaddr,	    /* number of sectors to transfer in this sd */
 		    sd->sectors - rqe->sdoffset);
 		rqe->groupoffset = 0;			    /* no groups for concatenated plexes */
 		rqe->grouplen = 0;
 		rqe->buflen = rqe->datalen;		    /* buffer length is data buffer length */
 		rqe->flags = 0;
 		rqe->driveno = sd->driveno;
 		if (sd->state != sd_up) {		    /* *now* we find the sd is down */
 		    s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
 		    if (s == REQUEST_DOWN) {		    /* down? */
 			if (rq->bp->b_flags & B_READ)	    /* read request, */
 			    return REQUEST_DEGRADED;	    /* give up here */
 			/*
 			 * If we're writing, don't give up
 			 * because of a bad subdisk.  Go
 			 * through to the bitter end, but note
 			 * which ones we can't access.
 			 */
 			rqe->flags = XFR_BAD_SUBDISK;
 			status = REQUEST_DEGRADED;	    /* can't do it all */
 		    }
 		}
 		*diskaddr += rqe->datalen;		    /* bump the address */
 		if ((rqe->flags & XFR_BAD_SUBDISK) == 0) {  /* subdisk OK, */
 		    /*
 		     * We could build the buffer anyway, even if the
 		     * subdisk is down, but it's a waste of time and
 		     * space.
 		     */
 		    if (build_rq_buffer(rqe, plex)) {	    /* build the buffer */
 			deallocrqg(rqg);
 			bp->b_flags |= B_ERROR;
 			bp->b_error = ENOMEM;
 			biodone(bp);
 			return REQUEST_ENOMEM;		    /* can't do it */
 		    }
 		}
 	    }
 	    if (*diskaddr == diskend)			    /* we're finished, */
 		break;					    /* get out of here */
 	}
 	/*
 	 * We've got to the end of the plex.  Have we got to the end of
 	 * the transfer?  It would seem that having an offset beyond the
 	 * end of the subdisk is an error, but in fact it can happen if
 	 * the volume has another plex of different size.  There's a valid
 	 * question as to why you would want to do this, but currently
 	 * it's allowed.
 	 *
 	 * In a previous version, I returned REQUEST_DOWN here.  I think
 	 * REQUEST_EOF is more appropriate now.
 	 */
 	if (diskend > sd->sectors + sd->plexoffset)	    /* pointing beyond EOF? */
 	    status = REQUEST_EOF;
 	break;
 
     case plex_striped:
 	{
 	    while (*diskaddr < diskend) {		    /* until we get it all sorted out */
 		if (*diskaddr >= plex->length)		    /* beyond the end of the plex */
 		    return REQUEST_EOF;			    /* can't continue */
 
 		/* The offset of the start address from the start of the stripe. */
 		stripeoffset = *diskaddr % (plex->stripesize * plex->subdisks);
 
 		/* The plex-relative address of the start of the stripe. */
 		stripebase = *diskaddr - stripeoffset;
 
 		/* The number of the subdisk in which the start is located. */
 		sdno = stripeoffset / plex->stripesize;
 
 		/* The offset from the beginning of the stripe on this subdisk. */
 		blockoffset = stripeoffset % plex->stripesize;
 
 		sd = &SD[plex->sdnos[sdno]];		    /* the subdisk in question */
 		rqg = allocrqg(rq, 1);			    /* space for the request */
 		if (rqg == NULL) {			    /* malloc failed */
 		    bp->b_flags |= B_ERROR;
 		    bp->b_error = ENOMEM;
 		    biodone(bp);
 		    return REQUEST_ENOMEM;
 		}
 		rqg->plexno = plexno;
 
 		rqe = &rqg->rqe[0];			    /* point to the element */
 		rqe->rqg = rqg;
 		rqe->sdoffset = stripebase / plex->subdisks + blockoffset; /* start offset in this subdisk */
 		rqe->useroffset = *diskaddr - diskstart;    /* The offset of the start in the user buffer */
 		rqe->dataoffset = 0;
 		rqe->datalen = min(diskend - *diskaddr,	    /* the amount remaining to transfer */
 		    plex->stripesize - blockoffset);	    /* and the amount left in this stripe */
 		rqe->groupoffset = 0;			    /* no groups for striped plexes */
 		rqe->grouplen = 0;
 		rqe->buflen = rqe->datalen;		    /* buffer length is data buffer length */
 		rqe->flags = 0;
 		rqe->sdno = sd->sdno;			    /* put in the subdisk number */
 		rqe->driveno = sd->driveno;
 
 		if (sd->state != sd_up) {		    /* *now* we find the sd is down */
 		    s = checksdstate(sd, rq, *diskaddr, diskend); /* do we need to change state? */
 		    if (s == REQUEST_DOWN) {		    /* down? */
 			if (rq->bp->b_flags & B_READ)	    /* read request, */
 			    return REQUEST_DEGRADED;	    /* give up here */
 			/*
 			 * If we're writing, don't give up
 			 * because of a bad subdisk.  Go through
 			 * to the bitter end, but note which
 			 * ones we can't access.
 			 */
 			rqe->flags = XFR_BAD_SUBDISK;	    /* yup */
 			status = REQUEST_DEGRADED;	    /* can't do it all */
 		    }
 		}
 		/*
 		 * It would seem that having an offset
 		 * beyond the end of the subdisk is an
 		 * error, but in fact it can happen if the
 		 * volume has another plex of different
 		 * size.  There's a valid question as to why
 		 * you would want to do this, but currently
 		 * it's allowed.
 		 */
 		if (rqe->sdoffset + rqe->datalen > sd->sectors) { /* ends beyond the end of the subdisk? */
 		    rqe->datalen = sd->sectors - rqe->sdoffset;	/* truncate */
 #if VINUMDEBUG
 		    if (debug & DEBUG_EOFINFO) {	    /* tell on the request */
 			log(LOG_DEBUG,
 			    "vinum: EOF on plex %s, sd %s offset %x (user offset %x)\n",
 			    plex->name,
 			    sd->name,
 			    (u_int) sd->sectors,
 			    bp->b_blkno);
 			log(LOG_DEBUG,
 			    "vinum: stripebase %x, stripeoffset %x, blockoffset %x\n",
 			    stripebase,
 			    stripeoffset,
 			    blockoffset);
 		    }
 #endif
 		}
 		if ((rqe->flags & XFR_BAD_SUBDISK) == 0) {  /* subdisk OK, */
 		    if (build_rq_buffer(rqe, plex)) {	    /* build the buffer */
 			deallocrqg(rqg);
 			bp->b_flags |= B_ERROR;
 			bp->b_error = ENOMEM;
 			biodone(bp);
 			return REQUEST_ENOMEM;		    /* can't do it */
 		    }
 		}
 		*diskaddr += rqe->datalen;		    /* look at the remainder */
 		if ((*diskaddr < diskend)		    /* didn't finish the request on this stripe */
 		&&(*diskaddr < plex->length)) {		    /* and there's more to come */
 		    plex->multiblock++;			    /* count another one */
 		    if (sdno == plex->subdisks - 1)	    /* last subdisk, */
 			plex->multistripe++;		    /* another stripe as well */
 		}
 	    }
 	}
 	break;
 
 	/*
 	 * RAID5 is complicated enough to have
 	 * its own function
 	 */
     case plex_raid5:
 	status = bre5(rq, plexno, diskaddr, diskend);
 	break;
 
     default:
 	log(LOG_ERR, "vinum: invalid plex type %d in bre\n", plex->organization);
 	status = REQUEST_DOWN;				    /* can't access it */
     }
 
     return status;
 }
 
 /*
  * Build up a request structure for reading volumes.
  * This function is not needed for plex reads, since there's
  * no recovery if a plex read can't be satisified.
  */
 enum requeststatus 
 build_read_request(struct request *rq,			    /* request */
     int plexindex)
 {							    /* index in the volume's plex table */
     struct buf *bp;
     daddr_t startaddr;					    /* offset of previous part of transfer */
     daddr_t diskaddr;					    /* offset of current part of transfer */
     daddr_t diskend;					    /* and end offset of transfer */
     int plexno;						    /* plex index in vinum_conf */
     struct rqgroup *rqg;				    /* point to the request we're working on */
     struct volume *vol;					    /* volume in question */
     off_t oldstart;					    /* note where we started */
     int recovered = 0;					    /* set if we recover a read */
     enum requeststatus status = REQUEST_OK;
     int plexmask;					    /* bit mask of plexes, for recovery */
 
     bp = rq->bp;					    /* buffer pointer */
     diskaddr = bp->b_blkno;				    /* start offset of transfer */
     diskend = diskaddr + (bp->b_bcount / DEV_BSIZE);	    /* and end offset of transfer */
     rqg = &rq->rqg[plexindex];				    /* plex request */
     vol = &VOL[rq->volplex.volno];			    /* point to volume */
 
     while (diskaddr < diskend) {			    /* build up request components */
 	startaddr = diskaddr;
 	status = bre(rq, vol->plex[plexindex], &diskaddr, diskend); /* build up a request */
 	switch (status) {
 	case REQUEST_OK:
 	    continue;
 
 	case REQUEST_RECOVERED:
 	    /*
 	     * XXX FIXME if we have more than one plex, and we can
 	     * satisfy the request from another, don't use the
 	     * recovered request, since it's more expensive.
 	     */
 	    recovered = 1;
 	    break;
 
 	case REQUEST_ENOMEM:
 	    return status;
 	    /*
 	     * If we get here, our request is not complete.  Try
 	     * to fill in the missing parts from another plex.
 	     * This can happen multiple times in this function,
 	     * and we reinitialize the plex mask each time, since
 	     * we could have a hole in our plexes.
 	     */
 	case REQUEST_EOF:
 	case REQUEST_DOWN:				    /* can't access the plex */
 	case REQUEST_DEGRADED:				    /* can't access the plex */
 	    plexmask = ((1 << vol->plexes) - 1)		    /* all plexes in the volume */
 	    &~(1 << plexindex);				    /* except for the one we were looking at */
 	    for (plexno = 0; plexno < vol->plexes; plexno++) {
 		if (plexmask == 0)			    /* no plexes left to try */
 		    return REQUEST_DOWN;		    /* failed */
 		diskaddr = startaddr;			    /* start at the beginning again */
 		oldstart = startaddr;			    /* and note where that was */
 		if (plexmask & (1 << plexno)) {		    /* we haven't tried this plex yet */
 		    bre(rq, vol->plex[plexno], &diskaddr, diskend); /* try a request */
 		    if (diskaddr > oldstart) {		    /* we satisfied another part */
 			recovered = 1;			    /* we recovered from the problem */
 			status = REQUEST_OK;		    /* don't complain about it */
 			break;
 		    }
 		}
 	    }
 	}
 	if (recovered)
 	    vol->recovered_reads += recovered;		    /* adjust our recovery count */
     }
     return status;
 }
 
 /*
  * Build up a request structure for writes.
  * Return 0 if all subdisks involved in the request are up, 1 if some
  * subdisks are not up, and -1 if the request is at least partially
  * outside the bounds of the subdisks.
  */
 enum requeststatus 
 build_write_request(struct request *rq)
 {							    /* request */
     struct buf *bp;
     daddr_t diskstart;					    /* offset of current part of transfer */
     daddr_t diskend;					    /* and end offset of transfer */
     int plexno;						    /* plex index in vinum_conf */
     struct volume *vol;					    /* volume in question */
     enum requeststatus status;
 
     bp = rq->bp;					    /* buffer pointer */
     vol = &VOL[rq->volplex.volno];			    /* point to volume */
     diskend = bp->b_blkno + (bp->b_bcount / DEV_BSIZE);	    /* end offset of transfer */
     status = REQUEST_DOWN;				    /* assume the worst */
     for (plexno = 0; plexno < vol->plexes; plexno++) {
 	diskstart = bp->b_blkno;			    /* start offset of transfer */
 	/*
 	 * Build requests for the plex.
 	 * We take the best possible result here (min,
 	 * not max): we're happy if we can write at all
 	 */
 	status = min(status, bre(rq,
 		vol->plex[plexno],
 		&diskstart,
 		diskend));
     }
     return status;
 }
 
 /* Fill in the struct buf part of a request element. */
 enum requeststatus 
 build_rq_buffer(struct rqelement *rqe, struct plex *plex)
 {
     struct sd *sd;					    /* point to subdisk */
     struct volume *vol;
     struct buf *bp;
     struct buf *ubp;					    /* user (high level) buffer header */
 
     vol = &VOL[rqe->rqg->rq->volplex.volno];
     sd = &SD[rqe->sdno];				    /* point to subdisk */
     bp = &rqe->b;
     ubp = rqe->rqg->rq->bp;				    /* pointer to user buffer header */
 
     /* Initialize the buf struct */
     bp->b_flags = ubp->b_flags & (B_NOCACHE | B_READ | B_ASYNC); /* copy these flags from user bp */
     bp->b_flags |= B_CALL;				    /* inform us when it's done */
     BUF_LOCKINIT(bp);					    /* get a lock for the buffer */
     BUF_LOCK(bp, LK_EXCLUSIVE);				    /* and lock it */
 
     /*
      * XXX Should we check for reviving plexes here, and
      * set B_ORDERED if so?
      */
     bp->b_iodone = complete_rqe;			    /* by calling us here */
     bp->b_dev = DRIVE[rqe->driveno].vp->v_rdev;		    /* drive device */
     bp->b_blkno = rqe->sdoffset + sd->driveoffset;	    /* start address */
     bp->b_bcount = rqe->buflen << DEV_BSHIFT;		    /* number of bytes to transfer */
     bp->b_resid = bp->b_bcount;				    /* and it's still all waiting */
     bp->b_bufsize = bp->b_bcount;			    /* and buffer size */
     bp->b_vp = DRIVE[rqe->driveno].vp;			    /* drive vnode */
     bp->b_rcred = FSCRED;				    /* we have the file system credentials */
     bp->b_wcred = FSCRED;				    /* we have the file system credentials */
 
     if (rqe->flags & XFR_MALLOCED) {			    /* this operation requires a malloced buffer */
 	bp->b_data = Malloc(bp->b_bcount);		    /* get a buffer to put it in */
 	if (bp->b_data == NULL) {			    /* failed */
 	    Debugger("XXX");
 	    abortrequest(rqe->rqg->rq, ENOMEM);
 	    return REQUEST_ENOMEM;			    /* no memory */
 	}
     } else
 	/*
 	 * Point directly to user buffer data.  This means
 	 * that we don't need to do anything when we have
 	 * finished the transfer
 	 */
 	bp->b_data = ubp->b_data + rqe->useroffset * DEV_BSIZE;
     /*
      * On a recovery read, we perform an XOR of
      * all blocks to the user buffer.  To make
      * this work, we first clean out the buffer
      */
     if ((rqe->flags & (XFR_RECOVERY_READ | XFR_BAD_SUBDISK))
 	== (XFR_RECOVERY_READ | XFR_BAD_SUBDISK)) {	    /* bad subdisk of a recovery read */
 	int length = rqe->grouplen << DEV_BSHIFT;	    /* and count involved */
 	char *data = (char *) &rqe->b.b_data[rqe->groupoffset << DEV_BSHIFT]; /* destination */
 
 	bzero(data, length);				    /* clean it out */
     }
     return 0;
 }
 /*
  * Abort a request: free resources and complete the
  * user request with the specified error
  */
 int 
 abortrequest(struct request *rq, int error)
 {
     struct buf *bp = rq->bp;				    /* user buffer */
 
     bp->b_flags |= B_ERROR;
     bp->b_error = error;
     freerq(rq);						    /* free everything we're doing */
     biodone(bp);
     return error;					    /* and give up */
 }
 
 /*
  * Check that our transfer will cover the
  * complete address space of the user request.
  *
  * Return 1 if it can, otherwise 0
  */
 int 
 check_range_covered(struct request *rq)
 {
     /* XXX */
     return 1;
 }
 
 /* Perform I/O on a subdisk */
 void 
 sdio(struct buf *bp)
 {
     int s;						    /* spl */
     struct sd *sd;
     struct sdbuf *sbp;
     daddr_t endoffset;
     struct drive *drive;
 
     sd = &SD[Sdno(bp->b_dev)];				    /* point to the subdisk */
     drive = &DRIVE[sd->driveno];
 
     if (drive->state != drive_up) {			    /* XXX until we get the states fixed */
 	if (bp->b_flags & B_WRITE)			    /* writing, */
 	    set_sd_state(Sdno(bp->b_dev), sd_stale, setstate_force);
 	else
 	    set_sd_state(Sdno(bp->b_dev), sd_crashed, setstate_force);
 	bp->b_flags |= B_ERROR;
 	bp->b_error = EIO;
 	biodone(bp);
 	return;
     }
     if (sd->state < sd_empty) {				    /* nothing to talk to, */
 	bp->b_flags |= B_ERROR;
 	bp->b_flags = EIO;
 	biodone(bp);
 	return;
     }
     /* Get a buffer */
     sbp = (struct sdbuf *) Malloc(sizeof(struct sdbuf));
     if (sbp == NULL) {
 	bp->b_flags |= B_ERROR;
 	bp->b_error = ENOMEM;
 	biodone(bp);
 	return;
     }
     bzero(sbp, sizeof(struct sdbuf));			    /* start with nothing */
     /*
      * XXX Should we check for reviving plexes here, and
      * set B_ORDERED if so?
      */
     sbp->b.b_flags = bp->b_flags | B_CALL;		    /* inform us when it's done */
     sbp->b.b_bufsize = bp->b_bufsize;			    /* buffer size */
     sbp->b.b_bcount = bp->b_bcount;			    /* number of bytes to transfer */
     sbp->b.b_resid = bp->b_resid;			    /* and amount waiting */
     sbp->b.b_dev = DRIVE[sd->driveno].vp->v_rdev;	    /* device */
     sbp->b.b_data = bp->b_data;				    /* data buffer */
     sbp->b.b_blkno = bp->b_blkno + sd->driveoffset;
     sbp->b.b_iodone = sdio_done;			    /* come here on completion */
     BUF_LOCKINIT(&sbp->b);				    /* get a lock for the buffer */
     BUF_LOCK(&sbp->b, LK_EXCLUSIVE);			    /* and lock it */
 
     sbp->b.b_vp = DRIVE[sd->driveno].vp;		    /* vnode */
     sbp->bp = bp;					    /* note the address of the original header */
     sbp->sdno = sd->sdno;				    /* note for statistics */
     sbp->driveno = sd->driveno;
     endoffset = bp->b_blkno + sbp->b.b_bcount / DEV_BSIZE;  /* final sector offset */
     if (endoffset > sd->sectors) {			    /* beyond the end */
 	sbp->b.b_bcount -= (endoffset - sd->sectors) * DEV_BSIZE; /* trim */
 	if (sbp->b.b_bcount <= 0) {			    /* nothing to transfer */
 	    bp->b_resid = bp->b_bcount;			    /* nothing transferred */
 	    /*
 	     * XXX Grrr.  This doesn't seem to work.  Return
 	     * an error after all
 	     */
 	    bp->b_flags |= B_ERROR;
 	    bp->b_error = ENOSPC;
 	    biodone(bp);
 	    Free(sbp);
 	    return;
 	}
     }
     if ((sbp->b.b_flags & B_READ) == 0)			    /* write */
 	sbp->b.b_vp->v_numoutput++;			    /* one more output going */
 #if VINUMDEBUG
     if (debug & DEBUG_ADDRESSES)
 	log(LOG_DEBUG,
 	    "  %s dev %d.%d, sd %d, offset 0x%x, devoffset 0x%x, length %ld\n",
 	    sbp->b.b_flags & B_READ ? "Read" : "Write",
 	    major(sbp->b.b_dev),
 	    minor(sbp->b.b_dev),
 	    sbp->sdno,
 	    (u_int) (sbp->b.b_blkno - SD[sbp->sdno].driveoffset),
 	    (int) sbp->b.b_blkno,
 	    sbp->b.b_bcount);				    /* XXX */
     if (debug & DEBUG_NUMOUTPUT)
 	log(LOG_DEBUG,
 	    "  vinumstart sd %d numoutput %ld\n",
 	    sbp->sdno,
 	    sbp->b.b_vp->v_numoutput);
 #endif
     s = splbio();
     (*bdevsw(sbp->b.b_dev)->d_strategy) (&sbp->b);
     splx(s);
 }
 
 /*
  * Simplified version of bounds_check_with_label
  * Determine the size of the transfer, and make sure it is
  * within the boundaries of the partition. Adjust transfer
  * if needed, and signal errors or early completion.
  *
  * Volumes are simpler than disk slices: they only contain
  * one component (though we call them a, b and c to make
  * system utilities happy), and they always take up the
  * complete space of the "partition".
  *
  * I'm still not happy with this: why should the label be
  * protected?  If it weren't so damned difficult to write
  * one in the first pleace (because it's protected), it wouldn't
  * be a problem.
  */
 int 
 vinum_bounds_check(struct buf *bp, struct volume *vol)
 {
     int maxsize = vol->size;				    /* size of the partition (sectors) */
     int size = (bp->b_bcount + DEV_BSIZE - 1) >> DEV_BSHIFT; /* size of this request (sectors) */
 
     /* Would this transfer overwrite the disk label? */
     if (bp->b_blkno <= LABELSECTOR			    /* starts before or at the label */
 #if LABELSECTOR != 0
 	&& bp->b_blkno + size > LABELSECTOR		    /* and finishes after */
 #endif
 	&& (!(vol->flags & VF_RAW))			    /* and it's not raw */
 	&&major(bp->b_dev) == BDEV_MAJOR		    /* and it's the block device */
 	&& (bp->b_flags & B_READ) == 0			    /* and it's a write */
 	&& (!vol->flags & (VF_WLABEL | VF_LABELLING))) {    /* and we're not allowed to write the label */
 	bp->b_error = EROFS;				    /* read-only */
 	bp->b_flags |= B_ERROR;
 	return -1;
     }
     if (size == 0)					    /* no transfer specified, */
 	return 0;					    /* treat as EOF */
     /* beyond partition? */
     if (bp->b_blkno < 0					    /* negative start */
 	|| bp->b_blkno + size > maxsize) {		    /* or goes beyond the end of the partition */
 	/* if exactly at end of disk, return an EOF */
 	if (bp->b_blkno == maxsize) {
 	    bp->b_resid = bp->b_bcount;
 	    return 0;
 	}
 	/* or truncate if part of it fits */
 	size = maxsize - bp->b_blkno;
 	if (size <= 0) {				    /* nothing to transfer */
 	    bp->b_error = EINVAL;
 	    bp->b_flags |= B_ERROR;
 	    return -1;
 	}
 	bp->b_bcount = size << DEV_BSHIFT;
     }
     bp->b_pblkno = bp->b_blkno;
     return 1;
 }
 
 /*
  * Allocate a request group and hook
  * it in in the list for rq
  */
 struct rqgroup *
 allocrqg(struct request *rq, int elements)
 {
     struct rqgroup *rqg;				    /* the one we're going to allocate */
     int size = sizeof(struct rqgroup) + elements * sizeof(struct rqelement);
 
     rqg = (struct rqgroup *) Malloc(size);
     if (rqg != NULL) {					    /* malloc OK, */
 	if (rq->rqg)					    /* we already have requests */
 	    rq->lrqg->next = rqg;			    /* hang it off the end */
 	else						    /* first request */
 	    rq->rqg = rqg;				    /* at the start */
 	rq->lrqg = rqg;					    /* this one is the last in the list */
 
 	bzero(rqg, size);				    /* no old junk */
 	rqg->rq = rq;					    /* point back to the parent request */
 	rqg->count = elements;				    /* number of requests in the group */
     }
     return rqg;
 }
 
 /*
  * Deallocate a request group out of a chain.  We do
  * this by linear search: the chain is short, this
  * almost never happens, and currently it can only
  * happen to the first member of the chain.
  */
 void 
 deallocrqg(struct rqgroup *rqg)
 {
     struct rqgroup *rqgc = rqg->rq->rqg;		    /* point to the request chain */
 
     if (rqgc == rqg)					    /* we're first in line */
 	rqg->rq->rqg = rqg->next;			    /* unhook ourselves */
     else {
 	while ((rqgc->next != NULL)			    /* find the group */
 	&&(rqgc->next != rqg))
 	    rqgc = rqgc->next;
 	if (rqgc->next == NULL)
 	    log(LOG_ERR,
 		"vinum deallocrqg: rqg %p not found in request %p\n",
 		rqg->rq,
 		rqg);
 	else
 	    rqgc->next = rqg->next;			    /* make the chain jump over us */
     }
     Free(rqg);
 }
Index: head/sys/dev/vn/vn.c
===================================================================
--- head/sys/dev/vn/vn.c	(revision 49534)
+++ head/sys/dev/vn/vn.c	(revision 49535)
@@ -1,758 +1,757 @@
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: Utah Hdr: vn.c 1.13 94/04/02
  *
  *	from: @(#)vn.c	8.6 (Berkeley) 4/1/94
- *	$Id: vn.c,v 1.80 1999/05/30 16:51:55 phk Exp $
+ *	$Id: vn.c,v 1.81 1999/07/20 09:47:33 phk Exp $
  */
 
 /*
  * Vnode disk driver.
  *
  * Block/character interface to a vnode.  Allows one to treat a file
  * as a disk (e.g. build a filesystem in it, mount it, etc.).
  *
  * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode
  * instead of a simple VOP_RDWR.  We do this to avoid distorting the
  * local buffer cache.
  *
  * NOTE 2: There is a security issue involved with this driver.
  * Once mounted all access to the contents of the "mapped" file via
  * the special file is controlled by the permissions on the special
  * file, the protection of the mapped file is ignored (effectively,
  * by using root credentials in all transactions).
  *
  * NOTE 3: Doesn't interact with leases, should it?
  */
 #include "vn.h"
 #if NVN > 0
 
 /* default is to have 8 VN's */
 #if NVN < 8
 #undef NVN
 #define	NVN	8
 #endif
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/disklabel.h>
 #include <sys/diskslice.h>
 #include <sys/stat.h>
 #include <sys/conf.h>
 #include <sys/module.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
-#include <miscfs/specfs/specdev.h>
 #include <sys/vnioctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/swap_pager.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 static	d_ioctl_t	vnioctl;
 static	d_open_t	vnopen;
 static	d_close_t	vnclose;
 static	d_dump_t	vndump;
 static	d_psize_t	vnsize;
 static	d_strategy_t	vnstrategy;
 
 #define CDEV_MAJOR 43
 #define BDEV_MAJOR 15
 
 /*
  * cdevsw
  *	D_DISK		we want to look like a disk
  *	( D_NOCLUSTERRW	 removed - clustering should be ok )
  *	D_CANFREE	We support B_FREEBUF
  */
 
 static struct cdevsw vn_cdevsw = {
 	/* open */	vnopen,
 	/* close */	vnclose,
 	/* read */	physread,
 	/* write */	physwrite,
 	/* ioctl */	vnioctl,
 	/* stop */	nostop,
 	/* reset */	noreset,
 	/* devtotty */	nodevtotty,
 	/* poll */	nopoll,
 	/* mmap */	nommap,
 	/* strategy */	vnstrategy,
 	/* name */	"vn",
 	/* parms */	noparms,
 	/* maj */	CDEV_MAJOR,
 	/* dump */	vndump,
 	/* psize */	vnsize,
 	/* flags */	D_DISK|D_CANFREE,
 	/* maxio */	0,
 	/* bmaj */	BDEV_MAJOR
 };
 
 
 #define	vnunit(dev)	dkunit(dev)
 
 #define	getvnbuf()	\
 	((struct buf *)malloc(sizeof(struct buf), M_DEVBUF, M_WAITOK))
 
 #define putvnbuf(bp)	\
 	free((caddr_t)(bp), M_DEVBUF)
 
 struct vn_softc {
 	int		sc_flags;	/* flags 			*/
 	int		sc_size;	/* size of vn, sc_secsize scale	*/
 	int		sc_secsize;	/* sector size			*/
 	struct diskslices *sc_slices;
 	struct vnode	*sc_vp;		/* vnode if not NULL		*/
 	vm_object_t	sc_object;	/* backing object if not NULL	*/
 	struct ucred	*sc_cred;	/* credentials 			*/
 	int		 sc_maxactive;	/* max # of active requests 	*/
 	struct buf	 sc_tab;	/* transfer queue 		*/
 	u_long		 sc_options;	/* options 			*/
 #ifdef DEVFS
 	void		*r_devfs_token;
 	void		*devfs_token;
 #endif
 };
 
 /* sc_flags */
 #define VNF_INITED	0x01
 
 static struct vn_softc *vn_softc[NVN];
 static u_long	vn_options;
 
 #define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt))
 
 #if 0
 static void	vniodone (struct buf *bp);
 #endif
 static int	vnsetcred (struct vn_softc *vn, struct ucred *cred);
 static void	vnclear (struct vn_softc *vn);
 static int	vn_modevent (module_t, int, void *);
 static int 	vniocattach_file (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p);
 static int 	vniocattach_swap (struct vn_softc *, struct vn_ioctl *, dev_t dev, int flag, struct proc *p);
 
 static	int
 vnclose(dev_t dev, int flags, int mode, struct proc *p)
 {
 	struct vn_softc *vn = vn_softc[vnunit(dev)];
 
 	IFOPT(vn, VN_LABELS)
 		if (vn->sc_slices != NULL)
 			dsclose(dev, mode, vn->sc_slices);
 	return (0);
 }
 
 static	int
 vnopen(dev_t dev, int flags, int mode, struct proc *p)
 {
 	int unit = vnunit(dev);
 	struct vn_softc *vn;
 
 	if (unit >= NVN) {
 		if (vn_options & VN_FOLLOW)
 			printf("vnopen(0x%lx, 0x%x, 0x%x, %p)\n",
 			    (u_long)dev, flags, mode, (void *)p);
 		return(ENOENT);
 	}
 
 	vn = vn_softc[unit];
 	if (!vn) {
 		vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK);
 		if (!vn)
 			return (ENOMEM);
 		bzero(vn, sizeof *vn);
 		vn_softc[unit] = vn;
 	}
 
 	IFOPT(vn, VN_FOLLOW)
 		printf("vnopen(0x%lx, 0x%x, 0x%x, %p)\n",
 		    (u_long)dev, flags, mode, (void *)p);
 
 	IFOPT(vn, VN_LABELS) {
 		if (vn->sc_flags & VNF_INITED) {
 			struct disklabel label;
 
 			/* Build label for whole disk. */
 			bzero(&label, sizeof label);
 			label.d_secsize = vn->sc_secsize;
 			label.d_nsectors = 32;
 			label.d_ntracks = 64 / (vn->sc_secsize / DEV_BSIZE);
 			label.d_secpercyl = label.d_nsectors * label.d_ntracks;
 			label.d_ncylinders = vn->sc_size / label.d_secpercyl;
 			label.d_secperunit = vn->sc_size;
 			label.d_partitions[RAW_PART].p_size = vn->sc_size;
 
 			return (dsopen("vn", dev, mode, 0, &vn->sc_slices,
 				       &label, vnstrategy, (ds_setgeom_t *)NULL,
 				       &vn_cdevsw));
 		}
 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
 		    dkpart(dev) != RAW_PART ||
 		    mode != S_IFCHR)
 			return (ENXIO);
 	}
 	return(0);
 }
 
 /*
  *	vnstrategy:
  *
  *	Run strategy routine for VN device.  We use VOP_READ/VOP_WRITE calls
  *	for vnode-backed vn's, and the new vm_pager_strategy() call for
  *	vm_object-backed vn's.
  *
  *	Currently B_ASYNC is only partially handled - for OBJT_SWAP I/O only.
  *
  *	NOTE: bp->b_blkno is DEV_BSIZE'd.  We must generate bp->b_pblkno for
  *	our uio or vn_pager_strategy() call that is vn->sc_secsize'd
  */
 
 static	void
 vnstrategy(struct buf *bp)
 {
 	int unit = vnunit(bp->b_dev);
 	struct vn_softc *vn = vn_softc[unit];
 	int error;
 	int isvplocked = 0;
 	long sz;
 	struct uio auio;
 	struct iovec aiov;
 
 	IFOPT(vn, VN_DEBUG)
 		printf("vnstrategy(%p): unit %d\n", bp, unit);
 
 	if ((vn->sc_flags & VNF_INITED) == 0) {
 		bp->b_error = ENXIO;
 		bp->b_flags |= B_ERROR;
 		biodone(bp);
 		return;
 	}
 
 	bp->b_resid = bp->b_bcount;
 
 	IFOPT(vn, VN_LABELS) {
 		if (vn->sc_slices != NULL && dscheck(bp, vn->sc_slices) <= 0) {
 			bp->b_flags |= B_INVAL;
 			biodone(bp);
 			return;
 		}
 	} else {
 		int pbn;
 
 		pbn = bp->b_blkno * (vn->sc_secsize / DEV_BSIZE);
 		sz = howmany(bp->b_bcount, vn->sc_secsize);
 
 		if (pbn < 0 || pbn + sz > vn->sc_size) {
 			if (pbn != vn->sc_size) {
 				bp->b_error = EINVAL;
 				bp->b_flags |= B_ERROR | B_INVAL;
 			}
 			biodone(bp);
 			return;
 		}
 		bp->b_pblkno = pbn;
 	}
 
 	if (vn->sc_vp && (bp->b_flags & B_FREEBUF)) {
 		/*
 		 * Not handled for vnode-backed element yet.
 		 */
 		biodone(bp);
 	} else if (vn->sc_vp) {
 		/*
 		 * VNODE I/O
 		 */
 		aiov.iov_base = bp->b_data;
 		aiov.iov_len = bp->b_bcount;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = (vm_ooffset_t)bp->b_pblkno * vn->sc_secsize;
 		auio.uio_segflg = UIO_SYSSPACE;
 		if( bp->b_flags & B_READ)
 			auio.uio_rw = UIO_READ;
 		else
 			auio.uio_rw = UIO_WRITE;
 		auio.uio_resid = bp->b_bcount;
 		auio.uio_procp = curproc;
 		if (!VOP_ISLOCKED(vn->sc_vp)) {
 			isvplocked = 1;
 			vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
 		}
 		if( bp->b_flags & B_READ)
 			error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
 		else
 			error = VOP_WRITE(vn->sc_vp, &auio, 0, vn->sc_cred);
 		if (isvplocked) {
 			VOP_UNLOCK(vn->sc_vp, 0, curproc);
 			isvplocked = 0;
 		}
 		bp->b_resid = auio.uio_resid;
 
 		if( error ) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 		}
 		biodone(bp);
 	} else if (vn->sc_object) {
 		/*
 		 * OBJT_SWAP I/O
 		 *
 		 * ( handles read, write, freebuf )
 		 */
 		vm_pager_strategy(vn->sc_object, bp);
 	} else {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = EINVAL;
 		biodone(bp);
 	}
 }
 
 
 #if 0
 
 void
 vniodone( struct buf *bp) {
 	bp->b_flags |= B_DONE;
 	wakeup((caddr_t) bp);
 }
 
 #endif
 
 /* ARGSUSED */
 static	int
 vnioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
 {
 	struct vn_softc *vn = vn_softc[vnunit(dev)];
 	struct vn_ioctl *vio;
 	int error;
 	u_long *f;
 
 	IFOPT(vn,VN_FOLLOW)
 		printf("vnioctl(0x%lx, 0x%lx, %p, 0x%x, %p): unit %d\n",
 		   (u_long)dev, cmd, (void *)data, flag, (void *)p,
 		   vnunit(dev));
 
 	switch (cmd) {
 	case VNIOCATTACH:
 	case VNIOCDETACH:
 	case VNIOCGSET:
 	case VNIOCGCLEAR:
 	case VNIOCUSET:
 	case VNIOCUCLEAR:
 		goto vn_specific;
 	}
 
 	IFOPT(vn,VN_LABELS) {
 		if (vn->sc_slices != NULL) {
 			error = dsioctl("vn", dev, cmd, data, flag,
 					&vn->sc_slices, vnstrategy,
 					(ds_setgeom_t *)NULL);
 			if (error != ENOIOCTL)
 				return (error);
 		}
 		if (dkslice(dev) != WHOLE_DISK_SLICE ||
 		    dkpart(dev) != RAW_PART)
 			return (ENOTTY);
 	}
 
     vn_specific:
 
 	error = suser(p);
 	if (error)
 		return (error);
 
 	vio = (struct vn_ioctl *)data;
 	f = (u_long*)data;
 	switch (cmd) {
 
 	case VNIOCATTACH:
 		if (vn->sc_flags & VNF_INITED)
 			return(EBUSY);
 
 		if (vio->vn_file == NULL)
 			error = vniocattach_swap(vn, vio, dev, flag, p);
 		else
 			error = vniocattach_file(vn, vio, dev, flag, p);
 		break;
 
 	case VNIOCDETACH:
 		if ((vn->sc_flags & VNF_INITED) == 0)
 			return(ENXIO);
 		/*
 		 * XXX handle i/o in progress.  Return EBUSY, or wait, or
 		 * flush the i/o.
 		 * XXX handle multiple opens of the device.  Return EBUSY,
 		 * or revoke the fd's.
 		 * How are these problems handled for removable and failing
 		 * hardware devices?
 		 */
 		vnclear(vn);
 		IFOPT(vn, VN_FOLLOW)
 			printf("vnioctl: CLRed\n");
 		break;
 
 	case VNIOCGSET:
 		vn_options |= *f;
 		*f = vn_options;
 		break;
 
 	case VNIOCGCLEAR:
 		vn_options &= ~(*f);
 		*f = vn_options;
 		break;
 
 	case VNIOCUSET:
 		vn->sc_options |= *f;
 		*f = vn->sc_options;
 		break;
 
 	case VNIOCUCLEAR:
 		vn->sc_options &= ~(*f);
 		*f = vn->sc_options;
 		break;
 
 	default:
 		error = ENOTTY;
 		break;
 	}
 	return(error);
 }
 
 /*
  *	vniocattach_file:
  *
  *	Attach a file to a VN partition.  Return the size in the vn_size
  *	field.
  */
 
 static int
 vniocattach_file(vn, vio, dev, flag, p)
 	struct vn_softc *vn;
 	struct vn_ioctl *vio;
 	dev_t dev;
 	int flag;
 	struct proc *p;
 {
 	struct vattr vattr;
 	struct nameidata nd;
 	int error;
 
 	/*
 	 * Always open for read and write.
 	 * This is probably bogus, but it lets vn_open()
 	 * weed out directories, sockets, etc. so we don't
 	 * have to worry about them.
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vn_file, p);
 	error = vn_open(&nd, FREAD|FWRITE, 0);
 	if (error)
 		return(error);
 	error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p);
 	if (error) {
 		VOP_UNLOCK(nd.ni_vp, 0, p);
 		(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
 		return(error);
 	}
 	VOP_UNLOCK(nd.ni_vp, 0, p);
 	vn->sc_secsize = DEV_BSIZE;
 	vn->sc_vp = nd.ni_vp;
 	vn->sc_size = vattr.va_size / vn->sc_secsize;	/* note truncation */
 	error = vnsetcred(vn, p->p_ucred);
 	if (error) {
 		(void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p);
 		return(error);
 	}
 	if (dev->si_bsize_phys < vn->sc_secsize)
 		dev->si_bsize_phys = vn->sc_secsize;
 	if (dev->si_bsize_best < vn->sc_secsize)
 		dev->si_bsize_best = vn->sc_secsize;
 	vn->sc_flags |= VNF_INITED;
 	IFOPT(vn, VN_LABELS) {
 		/*
 		 * Reopen so that `ds' knows which devices are open.
 		 * If this is the first VNIOCSET, then we've
 		 * guaranteed that the device is the cdev and that
 		 * no other slices or labels are open.  Otherwise,
 		 * we rely on VNIOCCLR not being abused.
 		 */
 		error = vnopen(dev, flag, S_IFCHR, p);
 		if (error)
 			vnclear(vn);
 	}
 	IFOPT(vn, VN_FOLLOW)
 		printf("vnioctl: SET vp %p size %x blks\n",
 		       vn->sc_vp, vn->sc_size);
 	return(0);
 }
 
 /*
  *	vniocattach_swap:
  *
  *	Attach swap backing store to a VN partition of the size specified
  *	in vn_size.
  */
 
 static int
 vniocattach_swap(vn, vio, dev, flag, p)
 	struct vn_softc *vn;
 	struct vn_ioctl *vio;
 	dev_t dev;
 	int flag;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * Range check.  Disallow negative sizes or any size less then the
 	 * size of a page.  Then round to a page.
 	 */
 
 	if (vio->vn_size <= 0)
 		return(EDOM);
 
 	/*
 	 * Allocate an OBJT_SWAP object.
 	 *
 	 * sc_secsize is PAGE_SIZE'd
 	 *
 	 * vio->vn_size is in PAGE_SIZE'd chunks.
 	 * sc_size must be in PAGE_SIZE'd chunks.  
 	 * Note the truncation.
 	 */
 
 	vn->sc_secsize = PAGE_SIZE;
 	vn->sc_size = vio->vn_size;
 	vn->sc_object = 
 	 vm_pager_allocate(OBJT_SWAP, NULL, vn->sc_secsize * (vm_ooffset_t)vio->vn_size, VM_PROT_DEFAULT, 0);
 	vn->sc_flags |= VNF_INITED;
 
 	error = vnsetcred(vn, p->p_ucred);
 	if (error == 0) {
 		IFOPT(vn, VN_LABELS) {
 			/*
 			 * Reopen so that `ds' knows which devices are open.
 			 * If this is the first VNIOCSET, then we've
 			 * guaranteed that the device is the cdev and that
 			 * no other slices or labels are open.  Otherwise,
 			 * we rely on VNIOCCLR not being abused.
 			 */
 			error = vnopen(dev, flag, S_IFCHR, p);
 		}
 	}
 	if (error == 0) {
 		IFOPT(vn, VN_FOLLOW) {
 			printf("vnioctl: SET vp %p size %x\n",
 			       vn->sc_vp, vn->sc_size);
 		}
 	}
 	if (error)
 		vnclear(vn);
 	return(error);
 }
 
 /*
  * Duplicate the current processes' credentials.  Since we are called only
  * as the result of a SET ioctl and only root can do that, any future access
  * to this "disk" is essentially as root.  Note that credentials may change
  * if some other uid can write directly to the mapped file (NFS).
  */
 int
 vnsetcred(struct vn_softc *vn, struct ucred *cred)
 {
 	struct uio auio;
 	struct iovec aiov;
 	char *tmpbuf;
 	int error = 0;
 
 	/*
 	 * Set credits in our softc
 	 */
 
 	if (vn->sc_cred)
 		crfree(vn->sc_cred);
 	vn->sc_cred = crdup(cred);
 
 	/*
 	 * Horrible kludge to establish credentials for NFS  XXX.
 	 */
 
 	if (vn->sc_vp) {
 		tmpbuf = malloc(vn->sc_secsize, M_TEMP, M_WAITOK);
 
 		aiov.iov_base = tmpbuf;
 		aiov.iov_len = vn->sc_secsize;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_resid = aiov.iov_len;
 		vn_lock(vn->sc_vp, LK_EXCLUSIVE | LK_RETRY, curproc);
 		error = VOP_READ(vn->sc_vp, &auio, 0, vn->sc_cred);
 		VOP_UNLOCK(vn->sc_vp, 0, curproc);
 		free(tmpbuf, M_TEMP);
 	}
 	return (error);
 }
 
 void
 vnclear(struct vn_softc *vn)
 {
 	struct proc *p = curproc;		/* XXX */
 
 	IFOPT(vn, VN_FOLLOW)
 		printf("vnclear(%p): vp=%p\n", vn, vn->sc_vp);
 	if (vn->sc_slices != NULL)
 		dsgone(&vn->sc_slices);
 	vn->sc_flags &= ~VNF_INITED;
 	if (vn->sc_vp != NULL) {
 		(void)vn_close(vn->sc_vp, FREAD|FWRITE, vn->sc_cred, p);
 		vn->sc_vp = NULL;
 	}
 	if (vn->sc_cred) {
 		crfree(vn->sc_cred);
 		vn->sc_cred = NULL;
 	}
 	if (vn->sc_object != NULL) {
 		vm_pager_deallocate(vn->sc_object);
 		vn->sc_object = NULL;
 	}
 	vn->sc_size = 0;
 }
 
 static	int
 vnsize(dev_t dev)
 {
 	int unit = vnunit(dev);
 	struct vn_softc *vn;
 
 	if (unit < 0 || unit >= NVN)
 		return(-1);
 
 	vn = vn_softc[unit];
 	if ((vn->sc_flags & VNF_INITED) == 0)
 		return(-1);
 
 	return(vn->sc_size);
 }
 
 static	int
 vndump(dev_t dev)
 {
 	return (ENODEV);
 }
 
 static int 
 vn_modevent(module_t mod, int type, void *data)
 {
 	int unit;
 #ifdef DEVFS
 	struct vn_softc *vn;
 #endif
 
 	switch (type) {
 	case MOD_LOAD:
 #ifdef DEVFS
 		for (unit = 0; unit < NVN; unit++) {
 			vn = malloc(sizeof *vn, M_DEVBUF, M_WAITOK);
 			if (!vn)
 				continue;	/* "oops" */
 			bzero(vn, sizeof *vn);
 			vn_softc[unit] = vn;
 			vn->r_devfs_token = devfs_add_devswf(&vn_cdevsw, 
 						dkmakeminor(unit, 0, 0),
 						DV_CHR, UID_ROOT, 
 						GID_OPERATOR, 0640,
 						"rvn%d", unit);
 			vn->devfs_token = devfs_add_devswf(&vn_cdevsw,
 						dkmakeminor(unit, 0, 0),
 						DV_BLK, UID_ROOT, 
 						GID_OPERATOR, 0640,
 						"vn%d", unit);
 		}
 #endif
 		break;
 
 	case MOD_UNLOAD:
 #ifdef DEVFS
 		for (unit = 0; unit < NVN; unit++) {
 			vn = vn_softc[unit];
 			if (vn->r_devfs_token) {
 				devfs_remove_dev(vn->r_devfs_token);
 				vn->r_devfs_token = 0;
 			}
 			if (vn->devfs_token) {
 				devfs_remove_dev(vn->devfs_token);
 				vn->devfs_token = 0;
 			}
 		}
 #endif
 		/* fall through */
 	case MOD_SHUTDOWN:
 		for (unit = 0; unit < NVN; unit++)
 			if (vn_softc[unit] &&
 			    vn_softc[unit]->sc_flags & VNF_INITED)
 				vnclear(vn_softc[unit]);
 		break;
 	default:
 		break;
 	}
 	return 0;
 }
 
 DEV_MODULE(vn, CDEV_MAJOR, BDEV_MAJOR, vn_cdevsw, vn_modevent, 0);
 
 #endif
Index: head/sys/dev/xe/if_xe.c
===================================================================
--- head/sys/dev/xe/if_xe.c	(revision 49534)
+++ head/sys/dev/xe/if_xe.c	(revision 49535)
@@ -1,2507 +1,2507 @@
 /*-
  * Copyright (c) 1998, 1999 Scott Mitchell
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	$Id: if_xe.c,v 1.20 1999/06/13 19:17:40 scott Exp $
  *	$FreeBSD$
  */
 
 /*
  * Portions of this software were derived from Werner Koch's xirc2ps driver
  * for Linux under the terms of the following license (from v1.30 of the
  * xirc2ps driver):
  *
  * Copyright (c) 1997 by Werner Koch (dd9jn)
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, and the entire permission notice in its entirety,
  *    including the disclaimer of warranties.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote
  *    products derived from this software without specific prior
  *    written permission.
  *
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.	IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
  * OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*		
  * FreeBSD device driver for Xircom CreditCard PCMCIA Ethernet adapters.  The
  * following cards are currently known to work with the driver:
  *   Xircom CreditCard 10/100 (CE3)
  *   Xircom CreditCard Ethernet + Modem 28 (CEM28)
  *   Xircom CreditCard Ethernet 10/100 + Modem 56 (CEM56)
  *   Xircom RealPort Ethernet 10
  *   Xircom RealPort Ethernet 10/100
  *   Xircom RealPort Ethernet 10/100 + Modem 56 (REM56, REM56G)
  *   Intel EtherExpress Pro/100 PC Card Mobile Adapter 16 (Pro/100 M16A)
  *   Compaq Netelligent 10/100 PC Card (CPQ-10/100)
  *
  * Some other cards *should* work, but support for them is either broken or in 
  * an unknown state at the moment.  I'm always interested in hearing from
  * people who own any of these cards:
  *   Xircom CreditCard 10Base-T (PS-CE2-10)
  *   Xircom CreditCard Ethernet + ModemII (CEM2)
  *   Xircom CEM28 and CEM33 Ethernet/Modem cards (may be variants of CEM2?)
  *
  * Thanks to all who assisted with the development and testing of the driver,
  * especially: Werner Koch, Duke Kamstra, Duncan Barclay, Jason George, Dru
  * Nelson, Mike Kephart, Bill Rainey and Douglas Rand.  Apologies if I've left
  * out anyone who deserves a mention here.
  *
  * Special thanks to Ade Lovett for both hosting the mailing list and doing
  * the CEM56/REM56 support code; and the FreeBSD UK Users' Group for hosting
  * the web pages.
  *
  * Contact points:
  *
  * Driver web page: http://ukug.uk.freebsd.org/~scott/xe_drv/
  *
  * Mailing list: http://www.lovett.com/lists/freebsd-xircom/
  * or send "subscribe freebsd-xircom" to <majordomo@lovett.com>
  *
  * Author email: <scott@uk.freebsd.org>
  */
 
 
 #ifndef XE_DEBUG
 #define XE_DEBUG 1	/* Increase for more voluminous output! */
 #endif
 
 #include "xe.h"
 #include "card.h"
 #include "apm.h"
 #include "bpf.h"
 
 #if NXE > 0
 
 #if NCARD > 0
 
 #include <sys/param.h>
 #include <sys/cdefs.h>
-#include <sys/conf.h>
 #include <sys/errno.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/select.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/systm.h>
 #include <sys/uio.h>
+#include <sys/conf.h>
 
 #include <net/ethernet.h>
 #include <net/if.h>
 #include <net/if_arp.h>
 #include <net/if_dl.h>
 #include <net/if_media.h>
 #include <net/if_mib.h>
 #if NBPF > 0
 #include <net/bpf.h>
 #endif /* NBPF > 0 */
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <dev/pccard/if_xereg.h>
 #include <machine/clock.h>
 #if NAPM > 0
 #include <machine/apm_bios.h>
 #endif /* NAPM > 0 */
 
 #include <pccard/cardinfo.h>
 #include <pccard/cis.h>
 #include <pccard/driver.h>
 #include <pccard/slot.h>
 
 
 
 /*
  * One of these structures per allocated device
  */
 struct xe_softc {
   struct arpcom arpcom;
   struct ifmedia ifmedia;
   struct ifmib_iso_8802_3 mibdata;
   struct callout_handle chand;
   struct isa_device *dev;
   struct pccard_devinfo *crd;
   struct ifnet *ifp;
   struct ifmedia *ifm;
   char *card_type;	/* Card model name */
   char *vendor;		/* Card manufacturer */
   int unit;		/* Unit number, from dev->id_unit */
   int srev;     	/* Silicon revision */
   int tx_queued;	/* Packets currently waiting to transmit */
   int tx_tpr;		/* Last value of TPR reg on card */
   int tx_collisions;	/* Collisions since last successful send */
   int tx_timeouts;	/* Count of transmit timeouts */
   int autoneg_status;	/* Autonegotiation progress state */
   int media;		/* Private media word */
   u_char version;	/* Bonding Version register from card */
   u_char modem;		/* 1 = Card has a modem */
   u_char ce2;		/* 1 = Card has CE2 silicon */
   u_char mohawk;      	/* 1 = Card has Mohawk (CE3) silicon */
   u_char dingo;    	/* 1 = Card has Dingo (CEM56) silicon */
   u_char phy_ok;	/* 1 = MII-compliant PHY found and initialised */
   u_char gone;		/* 1 = Card bailed out */
 #if NAPM > 0
   struct apmhook suspend_hook;
   struct apmhook resume_hook;
 #endif /* NAPM > 0 */
 };
 
 static struct xe_softc *sca[MAXSLOT];
 
 
 /*
  * MII command structure
  */
 struct xe_mii_frame {
   u_int8_t  mii_stdelim;
   u_int8_t  mii_opcode;
   u_int8_t  mii_phyaddr;
   u_int8_t  mii_regaddr;
   u_int8_t  mii_turnaround;
   u_int16_t mii_data;
 };
 
 /*
  * For accessing card registers
  */
 #define XE_INB(r)         inb(scp->dev->id_iobase+(r))
 #define XE_INW(r)         inw(scp->dev->id_iobase+(r))
 #define XE_OUTB(r, b)     outb(scp->dev->id_iobase+(r), (b))
 #define XE_OUTW(r, w)     outw(scp->dev->id_iobase+(r), (w))
 #define XE_SELECT_PAGE(p) XE_OUTB(XE_PR, (p))
 
 /*
  * Horrid stuff for accessing CIS tuples
  */
 #define CARD_MAJOR		50
 #define CISTPL_BUFSIZE		512
 #define CISTPL_TYPE(tpl)	tpl[0]
 #define CISTPL_LEN(tpl)		tpl[2]
 #define CISTPL_DATA(tpl,pos)	tpl[4 + ((pos)<<1)]
 
 /*
  * Media autonegotiation progress constants
  */
 #define XE_AUTONEG_NONE		0	/* No autonegotiation in progress */
 #define XE_AUTONEG_WAITING	1	/* Waiting for transmitter to go idle */
 #define XE_AUTONEG_STARTED	2	/* Waiting for autonegotiation to complete */
 #define XE_AUTONEG_100TX	3	/* Trying to force 100baseTX link */
 #define XE_AUTONEG_FAIL		4	/* Autonegotiation failed */
 
 
 /*
  * Prototypes start here
  */
 static int       xe_probe		(struct isa_device *dev);
 static int       xe_card_init		(struct pccard_devinfo *devi);
 static int       xe_attach		(struct isa_device *dev);
 static void      xe_init		(void *xscp);
 static void      xe_start		(struct ifnet *ifp);
 static int       xe_ioctl		(struct ifnet *ifp, u_long command, caddr_t data);
 static int       xe_card_intr		(struct pccard_devinfo *devi);
 static void      xe_watchdog		(struct ifnet *ifp);
 static int       xe_media_change	(struct ifnet *ifp);
 static void      xe_media_status	(struct ifnet *ifp, struct ifmediareq *mrp);
 static timeout_t xe_setmedia;
 static void      xe_hard_reset		(struct xe_softc *scp);
 static void      xe_soft_reset		(struct xe_softc *scp);
 static void      xe_stop		(struct xe_softc *scp);
 static void      xe_enable_intr		(struct xe_softc *scp);
 static void      xe_disable_intr	(struct xe_softc *scp);
 static void      xe_setmulti		(struct xe_softc *scp);
 static void      xe_setaddrs		(struct xe_softc *scp);
 static int       xe_pio_write_packet	(struct xe_softc *scp, struct mbuf *mbp);
 static void      xe_card_unload		(struct pccard_devinfo *devi);
 static u_int32_t xe_compute_crc		(u_int8_t *data, int len);
 static int       xe_compute_hashbit	(u_int32_t crc);
 
 /*
  * MII functions
  */
 static void      xe_mii_sync		(struct xe_softc *scp);
 static int       xe_mii_init    	(struct xe_softc *scp);
 static void      xe_mii_send		(struct xe_softc *scp, u_int32_t bits, int cnt);
 static int       xe_mii_readreg		(struct xe_softc *scp, struct xe_mii_frame *frame);
 static int       xe_mii_writereg	(struct xe_softc *scp, struct xe_mii_frame *frame);
 static u_int16_t xe_phy_readreg		(struct xe_softc *scp, u_int16_t reg);
 static void      xe_phy_writereg	(struct xe_softc *scp, u_int16_t reg, u_int16_t data);
 
 /*
  * Debug functions
  */
 #ifdef XE_DEBUG
 #define XE_REG_DUMP(scp)		xe_reg_dump((scp))
 #define XE_MII_DUMP(scp)		xe_mii_dump((scp))
 static void      xe_reg_dump		(struct xe_softc *scp);
 static void      xe_mii_dump		(struct xe_softc *scp);
 #else
 #define XE_REG_DUMP(scp)
 #define XE_MII_DUMP(scp)
 #endif
 
 #if NAPM > 0
 /*
  * APM hook functions
  */
 static int       xe_suspend		(void *xunit);
 static int       xe_resume		(void *xunit);
 #endif /* NAPM > 0 */
 
 
 /*
  * PCMCIA driver hooks
  */
 #ifdef PCCARD_MODULE
 PCCARD_MODULE(xe, xe_card_init, xe_card_unload, xe_card_intr, 0, net_imask);
 #else
 static struct pccard_device xe_info = {	/* For pre 3.1-STABLE code */
 	"xe",
 	xe_card_init,
 	xe_card_unload,
 	xe_card_intr,
 	0,
 	&net_imask
 };
 DATA_SET(pccarddrv_set, xe_info);
 #endif /* PCCARD_MODULE */
 
 
 /*
  * ISA driver hooks.  I'd like to do without these but the kernel config stuff 
  * seems to require them.
  */
 struct isa_driver xedriver = {
   xe_probe,
   xe_attach,
   "xe"
 };
 
 
 
 /*
  * ISA probe routine.
  * All of the supported devices are PCMCIA cards.  I have no idea if it's even 
  * possible to successfully probe/attach these at boot time (pccardd normally
  * does a lot of setup work) so I don't even bother trying.
  */
 static int
 xe_probe (struct isa_device *dev) {
 #ifdef XE_DEBUG
   printf("xe%d: probe\n", dev->id_unit);
 #endif
   bzero(sca, MAXSLOT * sizeof(sca[0]));
   return 0;
 }
 
 
 /*
  * Two routines to read from/write to the attribute memory
  * the write portion is used only for fixing up the RealPort cards,
  * the reader portion was needed for debugging info, and duplicated some
  * code in xe_card_init(), so it appears here instead with suitable
  * modifications to xe_card_init()
  * -aDe Lovett
  */
 static int
 xe_memwrite(struct pccard_devinfo *devi, off_t offset, u_char byte)
 {
   struct iovec iov;
   struct uio uios;
 
   iov.iov_base = &byte;
   iov.iov_len = sizeof(byte);
 
   uios.uio_iov = &iov;
   uios.uio_iovcnt = 1;
   uios.uio_offset = offset;
   uios.uio_resid = sizeof(byte);
   uios.uio_segflg = UIO_SYSSPACE;
   uios.uio_rw = UIO_WRITE;
   uios.uio_procp = 0;
 
 #if 0 /* THIS IS BOGUS */
   return cdevsw[CARD_MAJOR]->d_write(makedev(CARD_MAJOR, devi->slt->slotnum), &uios, 0);
 #else
   return (-1);
 #endif
 }
 
 
 static int
 xe_memread(struct pccard_devinfo *devi, off_t offset, u_char *buf, int size)
 {
   struct iovec iov;
   struct uio uios;
 
   iov.iov_base = buf;
   iov.iov_len = size;
 
   uios.uio_iov = &iov;
   uios.uio_iovcnt = 1;
   uios.uio_offset = offset;
   uios.uio_resid = size;
   uios.uio_segflg = UIO_SYSSPACE;
   uios.uio_rw = UIO_READ;
   uios.uio_procp = 0;
 
 #if 0 /* THIS IS BOGUS */
   return cdevsw[CARD_MAJOR]->d_read(makedev(CARD_MAJOR, devi->slt->slotnum), &uios, 0);
 #else
   return (-1);
 #endif
 }
 
 
 /*
  * Hacking for RealPort cards
  */
 static int
 xe_cem56fix(struct xe_softc *scp)
 {
   struct pccard_devinfo *devi;
   struct slot *slt;
   struct slot_ctrl *ctrl;
   int ioport, fail;
 
   /* initialise a few variables */
   devi = scp->crd;
   slt = devi->slt;
   ctrl = slt->ctrl;
 
   /* allocate a new I/O slot for the ethernet */
   /* XXX: ctrl->mapio() always appears to return 0 (success), so
    *      this may cause problems if another device is listening
    *	  on 0x300 already.  In this case, you should choose a
    *      known free I/O port address in the kernel config line
    *      for the driver.  It will be picked up here and used
    *      instead of the autodetected value.
    */
   slt->io[1].window = 1;
   slt->io[1].flags = IODF_WS|IODF_16BIT|IODF_ZEROWS|IODF_ACTIVE;
   slt->io[1].size = 0x10;
 
 #ifdef	XE_IOBASE
 
   printf( "xe%d: user requested ioport 0x%x\n", scp->unit, XE_IOBASE );
   ioport = XE_IOBASE;
   slt->io[1].start = ioport;
   fail = ctrl->mapio(slt, 1);
 
 #else
 
   for (ioport = 0x300; ioport < 0x400; ioport += 0x10) {
     slt->io[1].start = ioport;
     if ((fail = ctrl->mapio( slt, 1 )) == 0)
       break;
   }
 
 #endif
 
   /* did we find one? */
   if (fail) {
     printf( "xe%d: xe_cem56fix: no free address space\n", scp->unit );
     return -1;
   }
 
 
   /* munge the id_iobase entry for use by the rest of the driver */
 #if XE_DEBUG > 1
   printf( "xe%d: using 0x%x for RealPort ethernet\n", scp->unit, ioport );
 #endif
   scp->dev->id_iobase = ioport;
   scp->dev->id_alive  = 0x10;
 
   /* magic to set up the ethernet */
   xe_memwrite( devi, DINGO_ECOR, DINGO_ECOR_IRQ_LEVEL|DINGO_ECOR_INT_ENABLE|
 	       DINGO_ECOR_IOB_ENABLE|DINGO_ECOR_ETH_ENABLE );
   xe_memwrite( devi, DINGO_EBAR0, ioport & 0xff );
   xe_memwrite( devi, DINGO_EBAR1, (ioport >> 8) & 0xff );
 
   xe_memwrite( devi, DINGO_DCOR0, DINGO_DCOR0_SF_INT );
   xe_memwrite( devi, DINGO_DCOR1, DINGO_DCOR1_INT_LEVEL|DINGO_DCOR1_EEDIO );
   xe_memwrite( devi, DINGO_DCOR2, 0x00 );
   xe_memwrite( devi, DINGO_DCOR3, 0x00 );
   xe_memwrite( devi, DINGO_DCOR4, 0x00 );
 
   /* success! */
   return 0;
 }
 
 	
 /*
  * PCMCIA probe routine.
  * Probe and identify the device.  Called by the slot manager when the card is 
  * inserted or the machine wakes up from suspend mode.  Assmes that the slot
  * structure has been initialised already.
  */
 static int
 xe_card_init(struct pccard_devinfo *devi)
 {
   struct xe_softc *scp;
   struct isa_device *dev;
   u_char buf[CISTPL_BUFSIZE];
   u_char ver_str[CISTPL_BUFSIZE>>1];
   off_t offs;
   int unit, success, rc, i;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   dev = &devi->isahd;
   success = 0;
 
 #ifdef XE_DEBUG
   printf("xe: Probing for unit %d\n", unit);
 #endif
 
   /* Check that unit number is OK */
   if (unit > MAXSLOT) {
     printf("xe%d: bad unit\n", unit);
     return (ENODEV);
   }
 
   /* Don't attach an active device */
   if (scp && !scp->gone) {
     printf("xe%d: already attached\n", unit);
     return (EBUSY);
   }
 
   /* Allocate per-instance storage */
   if (!scp) {
     if ((scp = malloc(sizeof(*scp), M_DEVBUF, M_NOWAIT)) == NULL) {
       printf("xe%d: failed to allocage driver storage\n", unit);
       return (ENOMEM);
     }
     bzero(scp, sizeof(*scp));
   }
 
   /* Re-attach an existing device */
   if (scp->gone) {
     scp->gone = 0;
     return 0;
   }
 
   /* Grep through CIS looking for relevant tuples */
   offs = 0;
   do {
     u_int16_t vendor;
     u_int8_t rev, media, prod;
 
     /*
      * Read tuples one at a time into buf.  Sucks, but it only happens once.
      * XXX - This assumes that attribute has been mapped by pccardd, which
      * XXX - seems to be the default situation.  If not, we're well and truly
      * XXX - FUBAR.  This is a general PCCARD problem, not our fault :)
      */
     if ((rc = xe_memread( devi, offs, buf, CISTPL_BUFSIZE )) == 0) {
 
       switch (CISTPL_TYPE(buf)) {
 
        case 0x15:	/* Grab version string (needed to ID some weird CE2's) */
 #if XE_DEBUG > 1
 	printf("xe%d: Got version string (0x15)\n", unit);
 #endif
 	for (i = 0; i < CISTPL_LEN(buf); ver_str[i] = CISTPL_DATA(buf, i++));
 	ver_str[i] = '\0';
 	ver_str[(CISTPL_BUFSIZE>>1) - 1] = CISTPL_LEN(buf);
 	success++;
 	break;
 
        case 0x20:	/* Figure out what type of card we have */
 #if XE_DEBUG > 1
 	printf("xe%d: Got card ID (0x20)\n", unit);
 #endif
 	vendor = CISTPL_DATA(buf, 0) + (CISTPL_DATA(buf, 1) << 8);
 	rev = CISTPL_DATA(buf, 2);
 	media = CISTPL_DATA(buf, 3);
 	prod = CISTPL_DATA(buf, 4);
 
 	switch (vendor) {	/* Get vendor ID */
 	 case 0x0105:
 	  scp->vendor = "Xircom"; break;
 	 case 0x0138:
 	 case 0x0183:
 	  scp->vendor = "Compaq"; break;
 	 case 0x0089:
 	  scp->vendor = "Intel"; break;
 	 default:
 	  scp->vendor = "Unknown";
 	}
 
 	if (!((prod & 0x40) && (media & 0x01))) {
 #if XE_DEBUG > 1
 	printf("xe%d: Not a PCMCIA Ethernet card!\n", unit);
 #endif
 	  rc = ENODEV;		/* Not a PCMCIA Ethernet device */
 	}
 	else {
 	  if (media & 0x10) {	/* Ethernet/modem cards */
 #if XE_DEBUG > 1
 	printf("xe%d: Card is Ethernet/modem combo\n", unit);
 #endif
 	    scp->modem = 1;
 	    switch (prod & 0x0f) {
 	     case 1:
 	      scp->card_type = "CEM"; break;
 	     case 2:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM2"; break;
 	     case 3:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM3"; break;
 	     case 4:
 	      scp->ce2 = 1;
 	      scp->card_type = "CEM33"; break;
 	     case 5:
 	      scp->mohawk = 1;
 	      scp->card_type = "CEM56M"; break;
 	     case 6:
 	     case 7:		/* Some kind of RealPort card */
 	      scp->mohawk = 1;
 	      scp->dingo = 1;
 	      scp->card_type = "CEM56"; break;
 	     default:
 	      rc = ENODEV;
 	    }
 	  }
 	  else {		/* Ethernet-only cards */
 #if XE_DEBUG > 1
 	printf("xe%d: Card is Ethernet only\n", unit);
 #endif
 	    switch (prod & 0x0f) {
 	     case 1:
 	      scp->card_type = "CE"; break;
 	     case 2:
 	      scp->ce2 = 1;
 	      scp->card_type = "CE2"; break;
 	     case 3:
 	      scp->mohawk = 1;
 	      scp->card_type = "CE3"; break;
 	     default:
 	      rc = ENODEV;
 	    }
 	  }
 	}
 	success++;
 	break;
 
        case 0x22:	/* Get MAC address */
 	if ((CISTPL_LEN(buf) == 8) &&
 	    (CISTPL_DATA(buf, 0) == 0x04) &&
 	    (CISTPL_DATA(buf, 1) == ETHER_ADDR_LEN)) {
 #if XE_DEBUG > 1
 	  printf("xe%d: Got MAC address (0x22)\n", unit);
 #endif
 	  for (i = 0; i < ETHER_ADDR_LEN; scp->arpcom.ac_enaddr[i] = CISTPL_DATA(buf, i+2), i++);
 	}
 	success++;
 	break;
        default:
       }
     }
 
     /* Skip to next tuple */
     offs += ((CISTPL_LEN(buf) + 2) << 1);
 
   } while ((CISTPL_TYPE(buf) != 0xff) && (CISTPL_LEN(buf) != 0xff) && (rc == 0));
 
 
   /* Die now if something went wrong above */
   if ((rc != 0) || (success < 3)) {
     free(scp, M_DEVBUF);
     return rc;
   }
 
   /* Check for certain strange CE2's that look like CE's */
   if (strcmp(scp->card_type, "CE") == 0) {
     u_char *str = ver_str;
 #if XE_DEBUG > 1
     printf("xe%d: Checking for weird CE2 string\n", unit);
 #endif
     str += strlen(str) + 1;			/* Skip forward to 3rd version string */
     str += strlen(str) + 1;
     str += strlen(str) + 1;
     for (i = 0; i < strlen(str) - 2; i++) {
       if (bcmp(&str[i], "CE2", 3) ==0) {	/* Look for "CE2" string */
 	scp->card_type = "CE2";
       }
     }
   }
 
   /* Reject unsupported cards */
   if (strcmp(scp->card_type, "CE") == 0 || strcmp(scp->card_type, "CEM") == 0) {
     printf("xe%d: Sorry, your %s card is not supported :(\n", unit, scp->card_type);
     free(scp, M_DEVBUF);
     return ENODEV;
   }
 
   /* Fill in some private data */
   sca[unit] = scp;
   scp->dev = &devi->isahd;
   scp->crd = devi;
   scp->ifp = &scp->arpcom.ac_if;
   scp->ifm = &scp->ifmedia;
   scp->unit = unit;
   scp->autoneg_status = 0;
 
   /* Hack RealPorts into submission */
   if (scp->dingo && xe_cem56fix(scp) < 0) {
     printf( "xe%d: Unable to fix your RealPort\n", unit );
     sca[unit] = 0;
     free(scp, M_DEVBUF);
     return ENODEV;
   }
 
   /* Hopefully safe to read this here */
   XE_SELECT_PAGE(4);
   scp->version = XE_INB(XE_BOV);
 
   /* Attempt to attach the device */
   if (!xe_attach(scp->dev)) {
     sca[unit] = 0;
     free(scp, M_DEVBUF);
     return ENXIO;
   }
 
 #if NAPM > 0
   /* Establish APM hooks once device attached */
   scp->suspend_hook.ah_name = "xe_suspend";
   scp->suspend_hook.ah_fun = xe_suspend;
   scp->suspend_hook.ah_arg = (void *)unit;
   scp->suspend_hook.ah_order = APM_MIN_ORDER;
   apm_hook_establish(APM_HOOK_SUSPEND, &scp->suspend_hook);
   scp->resume_hook.ah_name = "xe_resume";
   scp->resume_hook.ah_fun = xe_resume;
   scp->resume_hook.ah_arg = (void *)unit;
   scp->resume_hook.ah_order = APM_MIN_ORDER;
   apm_hook_establish(APM_HOOK_RESUME, &scp->resume_hook);
 #endif /* NAPM > 0 */
 
   /* Success */
   return 0;
 }
 
 
 /*
  * Attach a device (called when xe_card_init succeeds).  Assume that the probe
  * routine has set up the softc structure correctly and that we can trust the
  * unit number.
  */
 static int
 xe_attach (struct isa_device *dev) {
   struct xe_softc *scp = sca[dev->id_unit];
   int i;
 
 #ifdef XE_DEBUG
   printf("xe%d: attach\n", scp->unit);
 #endif
 
   /* Initialise the ifnet structure */
   if (!scp->ifp->if_name) {
     scp->ifp->if_softc = scp;
     scp->ifp->if_name = "xe";
     scp->ifp->if_unit = scp->unit;
     scp->ifp->if_timer = 0;
     scp->ifp->if_flags = (IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
     scp->ifp->if_linkmib = &scp->mibdata;
     scp->ifp->if_linkmiblen = sizeof scp->mibdata;
     scp->ifp->if_output = ether_output;
     scp->ifp->if_start = xe_start;
     scp->ifp->if_ioctl = xe_ioctl;
     scp->ifp->if_watchdog = xe_watchdog;
     scp->ifp->if_init = xe_init;
     scp->ifp->if_snd.ifq_maxlen = IFQ_MAXLEN;
   }
 
   /* Initialise the ifmedia structure */
   ifmedia_init(scp->ifm, 0, xe_media_change, xe_media_status);
   callout_handle_init(&scp->chand);
 
   /*
    * Fill in supported media types.  Some cards _do_ support full duplex
    * operation, but this driver doesn't, yet.  Therefore we leave those modes
    * out of the list.  We support some form of autoselection in all cases.
    */
   if (scp->mohawk) {
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_100_TX, 0, NULL);
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_T, 0, NULL);
   }
   else {
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_T, 0, NULL);
     ifmedia_add(scp->ifm, IFM_ETHER|IFM_10_2, 0, NULL);
   }
   ifmedia_add(scp->ifm, IFM_ETHER|IFM_AUTO, 0, NULL);
 
   /* Default is to autoselect best supported media type */
   ifmedia_set(scp->ifm, IFM_ETHER|IFM_AUTO);
 
   /* Print some useful information */
   printf("\n");
   printf("xe%d: %s %s, bonding version %#x%s%s\n",
 	 scp->unit,
 	 scp->vendor,
 	 scp->card_type,
 	 scp->version,
 	 scp->mohawk ? ", 100Mbps capable" : "",
 	 scp->modem ?  ", with modem"      : "");
   if (scp->mohawk) {
     XE_SELECT_PAGE(0x10);
     printf("xe%d: DingoID = %#x, RevisionID = %#x, VendorID = %#x\n",
 	   scp->unit,
 	   XE_INW(XE_DINGOID),
 	   XE_INW(XE_RevID),
 	   XE_INW(XE_VendorID));
   }
   if (scp->ce2) {
     XE_SELECT_PAGE(0x45);
     printf("xe%d: CE2 version = %#x\n",
 	   scp->unit,
 	   XE_INB(XE_REV));
   }
 
   /* Print MAC address */
   printf("xe%d: Ethernet address %02x", scp->unit, scp->arpcom.ac_enaddr[0]);
   for (i = 1; i < ETHER_ADDR_LEN; i++) {
     printf(":%02x", scp->arpcom.ac_enaddr[i]);
   }
   printf("\n");
 
   /* Attach the interface */
   if_attach(scp->ifp);
   ether_ifattach(scp->ifp);
 
 #if NBPF > 0
   /* If BPF is in the kernel, call the attach for it */
 #if XE_DEBUG > 1
   printf("xe%d: BPF listener attached\n", scp->unit);
 #endif
   bpfattach(scp->ifp, DLT_EN10MB, sizeof(struct ether_header));
 #endif
 
   /* Done */
   return 1;
 }
 
 
 /*
  * Initialize device.  Completes the reset procedure on the card and starts
  * output.  If there's an autonegotiation in progress we DON'T do anything;
  * the media selection code will call us again when it's done.
  */
 static void
 xe_init(void *xscp) {
   struct xe_softc *scp = xscp;
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: init\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   if (TAILQ_EMPTY(&scp->ifp->if_addrhead)) return;
 
   /* Reset transmitter flags */
   scp->tx_queued = 0;
   scp->tx_tpr = 0;
   scp->tx_collisions = 0;
   scp->ifp->if_timer = 0;
 
   s = splimp();
 
   XE_SELECT_PAGE(0x42);
   XE_OUTB(XE_SWC0, 0x20);	/* Disable source insertion (WTF is that?) */
 
   /*
    * Set the 'local memory dividing line' -- splits the 32K card memory into
    * 8K for transmit buffers and 24K for receive.  This is done automatically
    * on newer revision cards.
    */
   if (scp->srev != 1) {
     XE_SELECT_PAGE(2);
     XE_OUTW(XE_RBS, 0x2000);
   }
 
   /* Set up multicast addresses */
   xe_setmulti(scp);
 
   /* Fix the data offset register -- reset leaves it off-by-one */
   XE_SELECT_PAGE(0);
   XE_OUTW(XE_DO, 0x2000);
 
   /*
    * Set MAC interrupt masks and clear status regs.  The bit names are direct
    * from the Linux code; I have no idea what most of them do.
    */
   XE_SELECT_PAGE(0x40);		/* Bit 7..0 */
   XE_OUTB(XE_RX0Msk, 0xff);	/* ROK, RAB, rsv, RO,  CRC, AE,  PTL, MP  */
   XE_OUTB(XE_TX0Msk, 0xff);	/* TOK, TAB, SQE, LL,  TU,  JAB, EXC, CRS */
   XE_OUTB(XE_TX0Msk+1, 0xb0);	/* rsv, rsv, PTD, EXT, rsv, rsv, rsv, rsv */
   XE_OUTB(XE_RST0, 0x00);	/* ROK, RAB, REN, RO,  CRC, AE,  PTL, MP  */
   XE_OUTB(XE_TXST0, 0x00);	/* TOK, TAB, SQE, LL,  TU,  JAB, EXC, CRS */
   XE_OUTB(XE_TXST1, 0x00);	/* TEN, rsv, PTD, EXT, retry_counter:4    */
 
   /*
    * Check for an in-progress autonegotiation.  If one is active, just set
    * IFF_RUNNING and return.  The media selection code will call us again when 
    * it's done.
    */
   if (scp->autoneg_status) {
     scp->ifp->if_flags |= IFF_RUNNING;
   }
   else {
     /* Enable receiver, put MAC online */
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_RX_ENABLE|XE_CMD0_ONLINE);
 
     /* Set up IMR, enable interrupts */
     xe_enable_intr(scp);
 
     /* Attempt to start output */
     scp->ifp->if_flags |= IFF_RUNNING;
     scp->ifp->if_flags &= ~IFF_OACTIVE;
     xe_start(scp->ifp);
   }
 
   (void)splx(s);
 }
 
 
 /*
  * Start output on interface.  We make two assumptions here:
  *  1) that the current priority is set to splimp _before_ this code
  *     is called *and* is returned to the appropriate priority after
  *     return
  *  2) that the IFF_OACTIVE flag is checked before this code is called
  *     (i.e. that the output part of the interface is idle)
  */
 static void
 xe_start(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
   struct mbuf *mbp;
 
   if (scp->gone) return;
 
   /*
    * Loop while there are packets to be sent, and space to send them.
    */
   while (1) {
     IF_DEQUEUE(&ifp->if_snd, mbp);	/* Suck a packet off the send queue */
 
     if (mbp == NULL) {
       /*
        * We are using the !OACTIVE flag to indicate to the outside world that
        * we can accept an additional packet rather than that the transmitter
        * is _actually_ active. Indeed, the transmitter may be active, but if
        * we haven't filled all the buffers with data then we still want to
        * accept more.
        */
       ifp->if_flags &= ~IFF_OACTIVE;
       return;
     }
 
     if (xe_pio_write_packet(scp, mbp) != 0) {
       IF_PREPEND(&ifp->if_snd, mbp);	/* Push the packet back onto the queue */
       ifp->if_flags |= IFF_OACTIVE;
       return;
     }
 
 #if NBPF > 0
     /* Tap off here if there is a bpf listener */
     if (ifp->if_bpf) {
 #if XE_DEBUG > 1
       printf("xe%d: sending output packet to BPF\n", scp->unit);
 #endif
       bpf_mtap(ifp, mbp);
     }
 #endif /* NBPF > 0 */
 
     ifp->if_timer = 5;			/* In case we don't hear from the card again */
     scp->tx_queued++;
 
     m_freem(mbp);
   }
 }
 
 
 /*
  * Process an ioctl request.  Adapted from the ed driver.
  */
 static int
 xe_ioctl (register struct ifnet *ifp, u_long command, caddr_t data) {
   struct xe_softc *scp;
   int s, error;
 
   scp = ifp->if_softc;
   error = 0;
 
   if (scp->gone) {
     return ENXIO;
   }
 
   s = splimp();
 
   switch (command) {
 
    case SIOCSIFADDR:
    case SIOCGIFADDR:
    case SIOCSIFMTU:
     error = ether_ioctl(ifp, command, data);
     break;
 
    case SIOCSIFFLAGS:
     /*
      * If the interface is marked up and stopped, then start it.  If it is
      * marked down and running, then stop it.
      */
     if (ifp->if_flags & IFF_UP) {
       if (!(ifp->if_flags & IFF_RUNNING)) {
 	xe_hard_reset(scp);
 	xe_setmedia(scp);
 	xe_init(scp);
       }
     }
     else {
       if (ifp->if_flags & IFF_RUNNING)
 	xe_stop(scp);
     }
 
    case SIOCADDMULTI:
    case SIOCDELMULTI:
     /*
      * Multicast list has (maybe) changed; set the hardware filter
      * accordingly.  This also serves to deal with promiscuous mode if we have 
      * a BPF listener active.
      */
     xe_setmulti(scp);
     error = 0;
     break;
 
    case SIOCSIFMEDIA:
    case SIOCGIFMEDIA:
     /*
      * Someone wants to get/set media options.
      */
     error = ifmedia_ioctl(ifp, (struct ifreq *)data, &scp->ifmedia, command);
     break;
 
    default:
     error = EINVAL;
   }
 
   (void)splx(s);
 
   return error;
 }
 
 
 /*
  * Card interrupt handler: should return true if the interrupt was for us, in
  * case we are sharing our IRQ line with other devices (this will probably be
  * the case for multifunction cards).
  *
  * This function is probably more complicated than it needs to be, as it
  * attempts to deal with the case where multiple packets get sent between
  * interrupts.  This is especially annoying when working out the collision
  * stats.  Not sure whether this case ever really happens or not (maybe on a
  * slow/heavily loaded machine?) so it's probably best to leave this like it
  * is.
  *
  * Note that the crappy PIO used to get packets on and off the card means that 
  * you will spend a lot of time in this routine -- I can get my P150 to spend
  * 90% of its time servicing interrupts if I really hammer the network.  Could 
  * fix this, but then you'd start dropping/losing packets.  The moral of this
  * story?  If you want good network performance _and_ some cycles left over to 
  * get your work done, don't buy a Xircom card.  Or convince them to tell me
  * how to do memory-mapped I/O :)
  */
 static int
 xe_card_intr(struct pccard_devinfo *devi) {
   struct xe_softc *scp;
   struct ifnet *ifp;
   int unit, result;
   u_int16_t rx_bytes, rxs, txs;
   u_int8_t psr, isr, esr, rsr;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   ifp = &scp->arpcom.ac_if;
   rx_bytes = 0;			/* Bytes received on this interrupt */
   result = 0;			/* Set true if the interrupt is for us */
 
   if (scp->gone)
     return 0;
 
   if (scp->mohawk) {
     XE_OUTB(XE_CR, 0);		/* Disable interrupts */
   }
 
   psr = XE_INB(XE_PR);		/* Stash the current register page */
 
   /*
    * Read ISR to see what caused this interrupt.  Note that this clears the
    * ISR on CE2 type cards.
    */
   if ((isr = XE_INB(XE_ISR)) && isr != 0xff) {
 
     result = 1;			/* This device did generate an int */
     esr = XE_INB(XE_ESR);	/* Read the other status registers */
     XE_SELECT_PAGE(0x40);
     rxs = XE_INB(XE_RST0);
     XE_OUTB(XE_RST0, ~rxs & 0xff);
     txs = XE_INB(XE_TXST0);
     txs |= XE_INB(XE_TXST1) << 8;
     XE_OUTB(XE_TXST0, 0);
     XE_OUTB(XE_TXST1, 0);
     XE_SELECT_PAGE(0);
 
 #if XE_DEBUG > 2
     printf("xe%d: ISR=%#2.2x ESR=%#2.2x RST=%#2.2x TXST=%#4.4x\n", unit, isr, esr, rxs, txs);
 #endif
 
     /*
      * Handle transmit interrupts
      */
     if (isr & XE_ISR_TX_PACKET) {
       u_int8_t new_tpr, sent;
       
       if ((new_tpr = XE_INB(XE_TPR)) < scp->tx_tpr)	/* Update packet count */
 	sent = (0xff - scp->tx_tpr) + new_tpr;		/* TPR rolled over */
       else
 	sent = new_tpr - scp->tx_tpr;
 
       if (sent > 0) {				/* Packets sent since last interrupt */
 	scp->tx_tpr = new_tpr;
 	scp->tx_queued -= sent;
 	ifp->if_opackets += sent;
 	ifp->if_collisions += scp->tx_collisions;
 
 	/*
 	 * Collision stats are a PITA.  If multiples frames have been sent, we 
 	 * distribute any outstanding collision count equally amongst them.
 	 * However, if we're missing interrupts we're quite likely to also
 	 * miss some collisions; thus the total count will be off anyway.
 	 * Likewise, if we miss a frame dropped due to excessive collisions
 	 * any outstanding collisions count will be held against the next
 	 * frame to be successfully sent.  Hopefully it averages out in the
 	 * end!
 	 * XXX - This will screw up if tx_collisions/sent > 14. FIX IT!
 	 */
 	switch (scp->tx_collisions) {
 	 case 0:
 	  break;
 	 case 1:
 	  scp->mibdata.dot3StatsSingleCollisionFrames++;
 	  scp->mibdata.dot3StatsCollFrequencies[0]++;
 	  break;
 	 default:
 	  if (sent == 1) {
 	    scp->mibdata.dot3StatsMultipleCollisionFrames++;
 	    scp->mibdata.dot3StatsCollFrequencies[scp->tx_collisions-1]++;
 	  }
 	  else {		/* Distribute across multiple frames */
 	    scp->mibdata.dot3StatsMultipleCollisionFrames += sent;
 	    scp->mibdata.
 	      dot3StatsCollFrequencies[scp->tx_collisions/sent] += sent - scp->tx_collisions%sent;
 	    scp->mibdata.
 	      dot3StatsCollFrequencies[scp->tx_collisions/sent + 1] += scp->tx_collisions%sent;
 	  }
 	}
 	scp->tx_collisions = 0;
       }
       ifp->if_timer = 0;
       ifp->if_flags &= ~IFF_OACTIVE;
     }
     if (txs & 0x0002) {		/* Excessive collisions (packet dropped) */
       ifp->if_collisions += 16;
       ifp->if_oerrors++;
       scp->tx_collisions = 0;
       scp->mibdata.dot3StatsExcessiveCollisions++;
       scp->mibdata.dot3StatsMultipleCollisionFrames++;
       scp->mibdata.dot3StatsCollFrequencies[15]++;
       XE_OUTB(XE_CR, XE_CR_RESTART_TX);
     }
     if (txs & 0x0040)		/* Transmit aborted -- probably collisions */
       scp->tx_collisions++;
 
 
     /*
      * Handle receive interrupts 
      */
     while ((esr = XE_INB(XE_ESR)) & XE_ESR_FULL_PACKET_RX) {
 
       if ((rsr = XE_INB(XE_RSR)) & XE_RSR_RX_OK) {
 	struct ether_header *ehp;
 	struct mbuf *mbp;
 	u_int16_t len;
 
 	len = XE_INW(XE_RBC);
 
 	if (len == 0)
 	  continue;
 
 #if 0
 	/*
 	 * Limit the amount of time we spend in this loop, dropping packets if 
 	 * necessary.  The Linux code does this with considerably more
 	 * finesse, adjusting the threshold dynamically.
 	 */
 	if ((rx_bytes += len) > 22000) {
 	  ifp->if_iqdrops++;
 	  scp->mibData.dot3StatsMissedFrames++;
 	  XE_OUTW(XE_DO, 0x8000);
 	  continue;
 	}
 #endif
 
 	if (len & 0x01)
 	  len++;
 
 	MGETHDR(mbp, M_DONTWAIT, MT_DATA);	/* Allocate a header mbuf */
 	if (mbp != NULL) {
 	  mbp->m_pkthdr.rcvif = ifp;
 	  mbp->m_pkthdr.len = mbp->m_len = len;
 
 	  /*
 	   * If the mbuf header isn't big enough for the packet, attach an
 	   * mbuf cluster to hold it.  The +2 is to allow for the nasty little 
 	   * alignment hack below.
 	   */
 	  if (len + 2 > MHLEN) {
 	    MCLGET(mbp, M_DONTWAIT);
 	    if ((mbp->m_flags & M_EXT) == 0) {
 	      m_freem(mbp);
 	      mbp = NULL;
 	    }
 	  }
 	}
 
 	if (mbp != NULL) {
 	  /*
 	   * The Ethernet header is 14 bytes long; thus the actual packet data 
 	   * won't be 32-bit aligned when it's dumped into the mbuf.  We
 	   * offset everything by 2 bytes to fix this.  Apparently the
 	   * alignment is important for NFS, damn its eyes.
 	   */
 	  mbp->m_data += 2;
 	  ehp = mtod(mbp, struct ether_header *);
 
 	  /*
 	   * Now get the packet, including the Ethernet header and trailer (?)
 	   * We use programmed I/O, because we don't know how to do shared
 	   * memory with these cards.  So yes, it's real slow, and heavy on
 	   * the interrupts (CPU on my P150 maxed out at ~950KBps incoming).
 	   */
 	  if (scp->srev == 0) {		/* Workaround a bug in old cards */
 	    u_short rhs;
 
 	    XE_SELECT_PAGE(5);
 	    rhs = XE_INW(XE_RHSA);
 	    XE_SELECT_PAGE(0);
 
 	    rhs += 3;			 /* Skip control info */
 
 	    if (rhs >= 0x8000)
 	      rhs = 0;
 
 	    if (rhs + len > 0x8000) {
 	      int i;
 
 	      /*
 	       * XXX - This i-- seems very wrong, but it's what the Linux guys 
 	       * XXX - do.  Need someone with an old CE2 to test this for me.
 	       * XXX - 99/3/28: Changed the first i-- to an i++, maybe that'll
 	       * XXX - fix it?  It seems as though the previous version would
 	       * XXX - have caused an infinite loop (what, another one?).
 	       */
 	      for (i = 0; i < len; i++, rhs++) {
 		((char *)ehp)[i] = XE_INB(XE_EDP);
 		if (rhs == 0x8000) {
 		  rhs = 0;
 		  i--;
 		}
 	      }
 	    }
 	    else
 	      insw(scp->dev->id_iobase+XE_EDP, ehp, len >> 1);
 	  }
 	  else
 	    insw(scp->dev->id_iobase+XE_EDP, ehp, len >> 1);
 
 #if NBPF > 0
 	  /*
 	   * Check if there's a BPF listener on this interface. If so, hand
 	   * off the raw packet to bpf.
 	   */
 	  if (ifp->if_bpf) {
 #if XE_DEBUG > 1
 	    printf("xe%d: passing input packet to BPF\n", scp->unit);
 #endif
 	    bpf_mtap(ifp, mbp);
 
 	    /*	
 	     * Note that the interface cannot be in promiscuous mode if there
 	     * are no BPF listeners.  And if we are in promiscuous mode, we
 	     * have to check if this packet is really ours.
 	     */
 	    if ((ifp->if_flags & IFF_PROMISC) &&
 		bcmp(ehp->ether_dhost, scp->arpcom.ac_enaddr, sizeof(ehp->ether_dhost)) != 0 &&
 		(rsr & XE_RSR_PHYS_PACKET)) {
 	      m_freem(mbp);
 	      mbp = NULL;
 	    }
 	  }
 #endif /* NBPF > 0 */
 
 	  if (mbp != NULL) {
 	    mbp->m_pkthdr.len = mbp->m_len = len - ETHER_HDR_LEN;
 	    mbp->m_data += ETHER_HDR_LEN;	/* Strip off Ethernet header */
 	    ether_input(ifp, ehp, mbp);		/* Send the packet on its way */
 	    ifp->if_ipackets++;			/* Success! */
 	  }
 	  XE_OUTW(XE_DO, 0x8000);		/* skip_rx_packet command */
 	}
       }
       else if (rsr & XE_RSR_LONG_PACKET) {	/* Packet length >1518 bytes */
 	scp->mibdata.dot3StatsFrameTooLongs++;
 	ifp->if_ierrors++;
       }
       else if (rsr & XE_RSR_CRC_ERROR) {	/* Bad checksum on packet */
 	scp->mibdata.dot3StatsFCSErrors++;
 	ifp->if_ierrors++;
       }
       else if (rsr & XE_RSR_ALIGN_ERROR) {	/* Packet alignment error */
 	scp->mibdata.dot3StatsAlignmentErrors++;
 	ifp->if_ierrors++;
       }
     }
     if (rxs & 0x10) {				/* Receiver overrun */
       scp->mibdata.dot3StatsInternalMacReceiveErrors++;
       ifp->if_ierrors++;
       XE_OUTB(XE_CR, XE_CR_CLEAR_OVERRUN);
     }
   }
 
   XE_SELECT_PAGE(psr);				/* Restore saved page */
   XE_OUTB(XE_CR, XE_CR_ENABLE_INTR);		/* Re-enable interrupts */
 
   /* Could force an int here, instead of dropping packets? */
   /* XE_OUTB(XE_CR, XE_CR_ENABLE_INTR|XE_CE_FORCE_INTR); */
 
   return result;
 }
 
 
 /*
  * Device timeout/watchdog routine.  Called automatically if we queue a packet 
  * for transmission but don't get an interrupt within a specified timeout
  * (usually 5 seconds).  When this happens we assume the worst and reset the
  * card.
  */
 static void
 xe_watchdog(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
 
   if (scp->gone) return;
 
   printf("xe%d: watchdog timeout; resetting card\n", scp->unit);
   scp->tx_timeouts++;
   ifp->if_oerrors += scp->tx_queued;
   xe_stop(scp);
   xe_hard_reset(scp);
   xe_setmedia(scp);
   xe_init(scp);
 }
 
 
 /*
  * Change media selection.
  */
 static int
 xe_media_change(struct ifnet *ifp) {
   struct xe_softc *scp = ifp->if_softc;
 
 #ifdef XE_DEBUG
   printf("xe%d: media_change\n", ifp->if_unit);
 #endif
 
   if (IFM_TYPE(scp->ifm->ifm_media) != IFM_ETHER)
     return(EINVAL);
 
   /*
    * Some card/media combos aren't always possible -- filter those out here.
    */
   if ((IFM_SUBTYPE(scp->ifm->ifm_media) == IFM_AUTO ||
        IFM_SUBTYPE(scp->ifm->ifm_media) == IFM_100_TX) && !scp->phy_ok)
     return (EINVAL);
 
   xe_setmedia(scp);
 
   return 0;
 }
 
 
 /*
  * Return current media selection.
  */
 static void
 xe_media_status(struct ifnet *ifp, struct ifmediareq *mrp) {
 
 #ifdef XE_DEBUG
   printf("xe%d: media_status\n", ifp->if_unit);
 #endif
 
   mrp->ifm_active = ((struct xe_softc *)ifp->if_softc)->media;
 
   return;
 }
 
 
 /*
  * Select active media.
  */
 static void xe_setmedia(void *xscp) {
   struct xe_softc *scp = xscp;
   u_int16_t bmcr, bmsr, anar, lpar;
 
 #ifdef XE_DEBUG
   printf("xe%d: setmedia\n", scp->unit);
 #endif
 
   /* Cancel any pending timeout */
   untimeout(xe_setmedia, scp, scp->chand);
   xe_disable_intr(scp);
 
   /* Select media */
   scp->media = IFM_ETHER;
   switch (IFM_SUBTYPE(scp->ifm->ifm_media)) {
 
    case IFM_AUTO:	/* Autoselect media */
     scp->media = IFM_ETHER|IFM_AUTO;
 
     /*
      * Autoselection is really awful.  It goes something like this:
      *
      * Wait until the transmitter goes idle (2sec timeout).
      * Reset card
      *   IF a 100Mbit PHY exists
      *     Start NWAY autonegotiation (3.5sec timeout)
      *     IF that succeeds
      *       Select 100baseTX or 10baseT, whichever was detected
      *     ELSE
      *       Reset card
      *       IF a 100Mbit PHY exists
      *         Try to force a 100baseTX link (3sec timeout)
      *         IF that succeeds
      *           Select 100baseTX
      *         ELSE
      *           Disable the PHY
      *         ENDIF
      *       ENDIF
      *     ENDIF
      *   ENDIF
      * IF nothing selected so far
      *   IF a 100Mbit PHY exists
      *     Select 10baseT
      *   ELSE
      *     Select 10baseT or 10base2, whichever is connected
      *   ENDIF
      * ENDIF
      */
     switch (scp->autoneg_status) {
 
      case XE_AUTONEG_NONE:
 #if XE_DEBUG > 1
       printf("xe%d: Waiting for idle transmitter\n", scp->unit);
 #endif
       scp->arpcom.ac_if.if_flags |= IFF_OACTIVE;
       scp->autoneg_status = XE_AUTONEG_WAITING;
       scp->chand = timeout(xe_setmedia, scp, hz * 2);
       return;
 
      case XE_AUTONEG_WAITING:
       xe_soft_reset(scp);
       if (scp->phy_ok) {
 #if XE_DEBUG > 1
 	printf("xe%d: Starting autonegotiation\n", scp->unit);
 #endif
 	bmcr = xe_phy_readreg(scp, PHY_BMCR);
 	bmcr &= ~(PHY_BMCR_AUTONEGENBL);
 	xe_phy_writereg(scp, PHY_BMCR, bmcr);
 	anar = xe_phy_readreg(scp, PHY_ANAR);
 	anar &= ~(PHY_ANAR_100BT4|PHY_ANAR_100BTXFULL|PHY_ANAR_10BTFULL);
 	anar |= PHY_ANAR_100BTXHALF|PHY_ANAR_10BTHALF;
 	xe_phy_writereg(scp, PHY_ANAR, anar);
 	bmcr |= PHY_BMCR_AUTONEGENBL|PHY_BMCR_AUTONEGRSTR;
 	xe_phy_writereg(scp, PHY_BMCR, bmcr);
 	scp->autoneg_status = XE_AUTONEG_STARTED;
 	scp->chand = timeout(xe_setmedia, scp, hz * 7/2);
 	return;
       }
       else {
 	scp->autoneg_status = XE_AUTONEG_FAIL;
       }
       break;
 
      case XE_AUTONEG_STARTED:
       bmsr = xe_phy_readreg(scp, PHY_BMSR);
       lpar = xe_phy_readreg(scp, PHY_LPAR);
       if (bmsr & (PHY_BMSR_AUTONEGCOMP|PHY_BMSR_LINKSTAT)) {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation complete!\n", scp->unit);
 #endif
 	/*
 	 * XXX - Shouldn't have to do this, but (on my hub at least) the
 	 * XXX - transmitter won't work after a successful autoneg.  So we see 
 	 * XXX - what the negotiation result was and force that mode.  I'm
 	 * XXX - sure there is an easy fix for this.
 	 */
 	if (lpar & PHY_LPAR_100BTXHALF) {
 	  xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
 	  XE_MII_DUMP(scp);
 	  XE_SELECT_PAGE(2);
 	  XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	  scp->media = IFM_ETHER|IFM_100_TX;
 	  scp->autoneg_status = XE_AUTONEG_NONE;
 	}
 	else {
 	  /*
 	   * XXX - Bit of a hack going on in here.
 	   * XXX - This is derived from Ken Hughes patch to the Linux driver
 	   * XXX - to make it work with 10Mbit _autonegotiated_ links on CE3B
 	   * XXX - cards.  What's a CE3B and how's it differ from a plain CE3?
 	   * XXX - these are the things we need to find out.
 	   */
 	  xe_phy_writereg(scp, PHY_BMCR, 0x0000);
 	  XE_SELECT_PAGE(2);
 	  /* BEGIN HACK */
 	  XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	  XE_SELECT_PAGE(0x42);
 	  XE_OUTB(XE_SWC1, 0x80);
 	  scp->media = IFM_ETHER|IFM_10_T;
 	  scp->autoneg_status = XE_AUTONEG_NONE;
 	  /* END HACK */
 	  /*XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);*/	/* Disable PHY? */
 	  /*scp->autoneg_status = XE_AUTONEG_FAIL;*/
 	}
       }
       else {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation failed; trying 100baseTX\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	xe_soft_reset(scp);
 	if (scp->phy_ok) {
 	  xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
 	  scp->autoneg_status = XE_AUTONEG_100TX;
 	  scp->chand = timeout(xe_setmedia, scp, hz * 3);
 	  return;
 	}
 	else {
 	  scp->autoneg_status = XE_AUTONEG_FAIL;
 	}
       }
       break;
 
      case XE_AUTONEG_100TX:
       (void)xe_phy_readreg(scp, PHY_BMSR);
       bmsr = xe_phy_readreg(scp, PHY_BMSR);
       if (bmsr & PHY_BMSR_LINKSTAT) {
 #if XE_DEBUG > 1
 	printf("xe%d: Got 100baseTX link!\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	XE_SELECT_PAGE(2);
 	XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
 	scp->media = IFM_ETHER|IFM_100_TX;
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
       else {
 #if XE_DEBUG > 1
 	printf("xe%d: Autonegotiation failed; disabling PHY\n", scp->unit);
 #endif
 	XE_MII_DUMP(scp);
 	xe_phy_writereg(scp, PHY_BMCR, 0x0000);
 	XE_SELECT_PAGE(2);
 	XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);	/* Disable PHY? */
 	scp->autoneg_status = XE_AUTONEG_FAIL;
       }
       break;
     }
 
     /*
      * If we got down here _and_ autoneg_status is XE_AUTONEG_FAIL, then
      * either autonegotiation failed, or never got started to begin with.  In
      * either case, select a suitable 10Mbit media and hope it works.  We
      * don't need to reset the card again, since it will have been done
      * already by the big switch above.
      */
     if (scp->autoneg_status == XE_AUTONEG_FAIL) {
 #if XE_DEBUG > 1
       printf("xe%d: Selecting 10baseX\n", scp->unit);
 #endif
       if (scp->mohawk) {
 	XE_SELECT_PAGE(0x42);
 	XE_OUTB(XE_SWC1, 0x80);
 	scp->media = IFM_ETHER|IFM_10_T;
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
       else {
 	XE_SELECT_PAGE(4);
 	XE_OUTB(XE_GPR0, 4);
 	DELAY(50000);
 	XE_SELECT_PAGE(0x42);
 	XE_OUTB(XE_SWC1, (XE_INB(XE_ESR) & XE_ESR_MEDIA_SELECT) ? 0x80 : 0xc0);
 	scp->media = IFM_ETHER|((XE_INB(XE_ESR) & XE_ESR_MEDIA_SELECT) ? IFM_10_T : IFM_10_2);
 	scp->autoneg_status = XE_AUTONEG_NONE;
       }
     }
     break;
 
 
     /*
      * If a specific media has been requested, we just reset the card and
      * select it (one small exception -- if 100baseTX is requested by there is 
      * no PHY, we fall back to 10baseT operation).
      */
    case IFM_100_TX:	/* Force 100baseTX */
     xe_soft_reset(scp);
     if (scp->phy_ok) {
 #if XE_DEBUG > 1
       printf("xe%d: Selecting 100baseTX\n", scp->unit);
 #endif
       XE_SELECT_PAGE(0x42);
       XE_OUTB(XE_SWC1, 0);
       xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_SPEEDSEL);
       XE_SELECT_PAGE(2);
       XE_OUTB(XE_MSR, XE_INB(XE_MSR) | 0x08);
       scp->media |= IFM_100_TX;
       break;
     }
     /* FALLTHROUGH */
 
    case IFM_10_T:	/* Force 10baseT */
     xe_soft_reset(scp);
 #if XE_DEBUG > 1
     printf("xe%d: Selecting 10baseT\n", scp->unit);
 #endif
     if (scp->phy_ok) {
       xe_phy_writereg(scp, PHY_BMCR, 0x0000);
       XE_SELECT_PAGE(2);
       XE_OUTB(XE_MSR, XE_INB(XE_MSR) & ~0x08);	/* Disable PHY */
     }
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x80);
     scp->media |= IFM_10_T;
     break;
 
    case IFM_10_2:
     xe_soft_reset(scp);
 #if XE_DEBUG > 1
     printf("xe%d: Selecting 10base2\n", scp->unit);
 #endif
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0xc0);
     scp->media |= IFM_10_2;
     break;
   }
 
 
   /*
    * Finally, the LEDs are set to match whatever media was chosen and the
    * transmitter is unblocked. 
    */
 #if XE_DEBUG > 1
   printf("xe%d: Setting LEDs\n", scp->unit);
 #endif
   XE_SELECT_PAGE(2);
   switch (IFM_SUBTYPE(scp->media)) {
    case IFM_100_TX:
    case IFM_10_T:
     XE_OUTB(XE_LED, 0x3b);
     if (scp->dingo)
       XE_OUTB(0x0b, 0x04);	/* 100Mbit LED */
     break;
 
    case IFM_10_2:
     XE_OUTB(XE_LED, 0x3a);
     break;
   }
 
   /* Restart output? */
   scp->ifp->if_flags &= ~IFF_OACTIVE;
   xe_init(scp);
 }
 
 
 /*
  * Hard reset (power cycle) the card.
  */
 static void
 xe_hard_reset(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: hard_reset\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Power cycle the card.
    */
   XE_SELECT_PAGE(4);
   XE_OUTB(XE_GPR1, 0);		/* Power off */
   DELAY(40000);
 
   if (scp->mohawk)
     XE_OUTB(XE_GPR1, 1);	/* And back on again */
   else
     XE_OUTB(XE_GPR1, 5);	/* Also set AIC bit, whatever that is */
   DELAY(40000);
   XE_SELECT_PAGE(0);
 
   (void)splx(s);
 }
 
 
 /*
  * Soft reset the card.  Also makes sure that the ML6692 and 10Mbit controller 
  * are powered up, sets the silicon revision number in softc, disables
  * interrupts and checks for the prescence of a 100Mbit PHY.  This should
  * leave us in a position where we can access the PHY and do media
  * selection. The function imposes a 0.5s delay while the hardware powers up.
  */
 static void
 xe_soft_reset(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: soft_reset\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Reset the card, (again).
    */
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, XE_CR_SOFT_RESET);
   DELAY(40000);
   XE_OUTB(XE_CR, 0);
   DELAY(40000);
 
   if (scp->mohawk) {
     /*
      * set GP1 and GP2 as outputs (bits 2 & 3)
      * set GP1 low to power on the ML6692 (bit 0)
      * set GP2 high to power on the 10Mhz chip (bit 1)
      */
     XE_SELECT_PAGE(4);
     XE_OUTB(XE_GPR0, 0x0e);
   }
 
   /*
    * Wait for everything to wake up.
    */
   DELAY(500000);
 
   /*
    * Get silicon revision number.
    */
   XE_SELECT_PAGE(4);
   if (scp->mohawk)
     scp->srev = (XE_INB(XE_BOV) & 0x70) >> 4;
   else
     scp->srev = (XE_INB(XE_BOV) & 0x30) >> 4;
 #ifdef XE_DEBUG
   printf("xe%d: silicon revision = %d\n", scp->unit, scp->srev);
 #endif
   
   /*
    * Shut off interrupts.
    */
   xe_disable_intr(scp);
 
   /*
    * Check for PHY.
    */
   if (scp->mohawk) {
     scp->phy_ok = xe_mii_init(scp);
   }
 
   XE_SELECT_PAGE(0);
 
   (void)splx(s);
 }
 
 
 /*
  * Take interface offline.  This is done by powering down the device, which I
  * assume means just shutting down the transceiver and Ethernet logic.  This
  * requires a _hard_ reset to recover from, as we need to power up again.
  */
 static void
 xe_stop(struct xe_softc *scp) {
   int s;
 
 #ifdef XE_DEBUG
   printf("xe%d: stop\n", scp->unit);
 #endif
 
   if (scp->gone) return;
 
   s = splimp();
 
   /*
    * Shut off interrupts.
    */
   xe_disable_intr(scp);
 
   /*
    * Power down.
    */
   XE_SELECT_PAGE(4);
   XE_OUTB(XE_GPR1, 0);
   XE_SELECT_PAGE(0);
 
   /*
    * ~IFF_RUNNING == interface down.
    */
   scp->ifp->if_flags &= ~IFF_RUNNING;
   scp->ifp->if_flags &= ~IFF_OACTIVE;
   scp->ifp->if_timer = 0;
 
   (void)splx(s);
 }
 
 
 /*
  * Enable Ethernet interrupts from the card.
  */
 static void
 xe_enable_intr(struct xe_softc *scp) {
 #ifdef XE_DEBUG
   printf("xe%d: enable_intr\n", scp->unit);
 #endif
 
   XE_SELECT_PAGE(1);
   XE_OUTB(XE_IMR0, 0xff);		/* Unmask everything */
   XE_OUTB(XE_IMR1, 0x01);		/* Unmask TX underrun detection */
   DELAY(1);
 
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, XE_CR_ENABLE_INTR);	/* Enable interrupts */
   if (scp->modem && !scp->dingo) {	/* This bit is just magic */
     if (!(XE_INB(0x10) & 0x01)) {
       XE_OUTB(0x10, 0x11);		/* Unmask master int enable bit */
     }
   }
 }
 
 
 /*
  * Disable all Ethernet interrupts from the card.
  */
 static void
 xe_disable_intr(struct xe_softc *scp) {
 #ifdef XE_DEBUG
   printf("xe%d: disable_intr\n", scp->unit);
 #endif
 
   XE_SELECT_PAGE(0);
   XE_OUTB(XE_CR, 0);			/* Disable interrupts */
   if (scp->modem && !scp->dingo) {	/* More magic (does this work?) */
     XE_OUTB(0x10, 0x10);		/* Mask the master int enable bit */
   }
 
   XE_SELECT_PAGE(1);
   XE_OUTB(XE_IMR0, 0);			/* Forbid all interrupts */
   XE_OUTB(XE_IMR1, 0);
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Set up multicast filter and promiscuous mode
  */
 static void
 xe_setmulti(struct xe_softc *scp) {
   struct ifnet *ifp;
   struct ifmultiaddr *maddr;
   int count;
 
   ifp = &scp->arpcom.ac_if;
   maddr = ifp->if_multiaddrs.lh_first;
 
   /* Get length of multicast list */
   for (count = 0; maddr != NULL; maddr = maddr->ifma_link.le_next, count++);
 
   if ((ifp->if_flags & IFF_PROMISC) || (ifp->if_flags & IFF_ALLMULTI) || (count > 9)) {
     /*
      * Go into promiscuous mode if either of the PROMISC or ALLMULTI flags are
      * set, or if we have been asked to deal with more than 9 multicast
      * addresses.  To do this: set MPE and PME in SWC1
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x06);
   }
   else if ((ifp->if_flags & IFF_MULTICAST) && (count > 0)) {
     /*
      * Program the filters for up to 9 addresses
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0x01);
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_OFFLINE);
     /*xe_reg_dump(scp);*/
     xe_setaddrs(scp);
     /*xe_reg_dump(scp);*/
     XE_SELECT_PAGE(0x40);
     XE_OUTB(XE_CMD0, XE_CMD0_RX_ENABLE|XE_CMD0_ONLINE);
   }
   else {
     /*
      * No multicast operation (default)
      */
     XE_SELECT_PAGE(0x42);
     XE_OUTB(XE_SWC1, 0);
   }
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Set up all on-chip addresses (for multicast).  AFAICS, there are 10
  * of these things; the first is our MAC address, the other 9 are mcast
  * addresses, padded with the MAC address if there aren't enough.
  * XXX - This doesn't work right, but I'm not sure why yet.  We seem to be
  * XXX - doing much the same as the Linux code, which is weird enough that
  * XXX - it's probably right (despite my earlier comments to the contrary).
  */
 static void
 xe_setaddrs(struct xe_softc *scp) {
   struct ifmultiaddr *maddr;
   u_int8_t *addr;
   u_int8_t page, slot, byte, i;
 
   maddr = scp->arpcom.ac_if.if_multiaddrs.lh_first;
 
   XE_SELECT_PAGE(page = 0x50);
 
   for (slot = 0, byte = 8; slot < 10; slot++) {
 
     if (slot == 0)
       addr = (u_int8_t *)(&scp->arpcom.ac_enaddr);
     else {
       while (maddr != NULL && maddr->ifma_addr->sa_family != AF_LINK)
 	maddr = maddr->ifma_link.le_next;
       if (maddr != NULL)
 	addr = LLADDR((struct sockaddr_dl *)maddr->ifma_addr);
       else
 	addr = (u_int8_t *)(&scp->arpcom.ac_enaddr);
     }
 
     for (i = 0; i < 6; i++, byte++) {
 #if XE_DEBUG > 2
       if (i)
 	printf(":%x", addr[i]);
       else
 	printf("xe%d: individual addresses %d: %x", scp->unit, slot, addr[0]);
 #endif
 
       if (byte > 15) {
 	page++;
 	byte = 8;
 	XE_SELECT_PAGE(page);
       }
 
       if (scp->mohawk)
 	XE_OUTB(byte, addr[5 - i]);
       else
 	XE_OUTB(byte, addr[i]);
     }
 #if XE_DEBUG > 2
     printf("\n");
 #endif
   }
 
   XE_SELECT_PAGE(0);
 }
 
 
 /*
  * Write an outgoing packet to the card using programmed I/O.
  */
 static int
 xe_pio_write_packet(struct xe_softc *scp, struct mbuf *mbp) {
   struct mbuf *mbp2;
   u_int16_t len, pad, free, ok;
   u_int8_t *data;
   u_int8_t savebyte[2], wantbyte;
 
   /* Get total packet length */
   for (len = 0, mbp2 = mbp; mbp2 != NULL; len += mbp2->m_len, mbp2 = mbp2->m_next);
 
   /* Packets < minimum length may need to be padded out */
   pad = 0;
   if (len < ETHER_MIN_LEN - ETHER_CRC_LEN) {
     pad = (ETHER_MIN_LEN - ETHER_CRC_LEN - len + 1) >> 1;
     len = ETHER_MIN_LEN - ETHER_CRC_LEN;
   }
 
   /* Check transmit buffer space */
   XE_SELECT_PAGE(0);
   XE_OUTW(XE_TRS, len+2);
   free = XE_INW(XE_TSO);
   ok = free & 0x8000;
   free &= 0x7fff;
   if (free <= len + 2)
     return 1;
 
   /* Send packet length to card */
   XE_OUTW(XE_EDP, len);
 
   /*
    * Write packet to card using PIO (code stolen from the ed driver)
    */
   wantbyte = 0;
   while (mbp != NULL) {
     len = mbp->m_len;
     if (len > 0) {
       data = mtod(mbp, caddr_t);
       if (wantbyte) {		/* Finish the last word */
 	savebyte[1] = *data;
 	XE_OUTW(XE_EDP, *(u_short *)savebyte);
 	data++;
 	len--;
 	wantbyte = 0;
       }
       if (len > 1) {		/* Output contiguous words */
 	outsw(scp->dev->id_iobase+XE_EDP, data, len >> 1);
 	data += len & ~1;
 	len &= 1;
       }
       if (len == 1) {		/* Save last byte, if necessary */
 	savebyte[0] = *data;
 	wantbyte = 1;
       }
     }
     mbp = mbp->m_next;
   }
   if (wantbyte)			/* Last byte for odd-length packets */
     XE_OUTW(XE_EDP, *(u_short *)savebyte);
 
   /*
    * For CE3 cards, just tell 'em to send -- apparently the card will pad out
    * short packets with random cruft.  Otherwise, write nonsense words to fill 
    * out the packet.  I guess it is then sent automatically (?)
    */
   if (scp->mohawk)
     XE_OUTB(XE_CR, XE_CR_TX_PACKET|XE_CR_ENABLE_INTR);
   else
     while (pad > 0) {
       XE_OUTW(XE_EDP, 0xdead);
       pad--;
     }
 
   return 0;
 }
 
 
 /*
  * The device entry is being removed, probably because someone ejected the
  * card.  The interface should have been brought down manually before calling
  * this function; if not you may well lose packets.  In any case, I shut down
  * the card and the interface, and hope for the best.  The 'gone' flag is set, 
  * so hopefully no-one else will try to access the missing card.
  */
 static void
 xe_card_unload(struct pccard_devinfo *devi) {
   struct xe_softc *scp;
   struct ifnet *ifp;
   int unit;
 
   unit = devi->isahd.id_unit;
   scp = sca[unit];
   ifp = &scp->arpcom.ac_if;
 
   if (scp->gone) {
     printf("xe%d: already unloaded\n", unit);
     return;
   }
 
   if_down(ifp);
   ifp->if_flags &= ~(IFF_RUNNING|IFF_OACTIVE);
   xe_stop(scp);
   scp->gone = 1;
 }
 
 
 /*
  * Compute the 32-bit Ethernet CRC for the given buffer.
  */
 static u_int32_t
 xe_compute_crc(u_int8_t *data, int len) {
   u_int32_t crc = 0xffffffff;
   u_int32_t poly = 0x04c11db6;
   u_int8_t current, crc31, bit;
   int i, k;
 
   for (i = 0; i < len; i++) {
     current = data[i];
     for (k = 1; k <= 8; k++) {
       if (crc & 0x80000000) {
 	crc31 = 0x01;
       }
       else {
 	crc31 = 0;
       }
       bit = crc31 ^ (current & 0x01);
       crc <<= 1;
       current >>= 1;
       if (bit) {
 	crc = (crc ^ poly)|1;
       }
     }
   }
   return crc;
 }
 
 
 /*
  * Convert a CRC into an index into the multicast hash table.  What we do is
  * take the most-significant 6 bits of the CRC, reverse them, and use that as
  * the bit number in the hash table.  Bits 5:3 of the result give the byte
  * within the table (0-7); bits 2:0 give the bit number within that byte (also 
  * 0-7), ie. the number of shifts needed to get it into the lsb position.
  */
 static int
 xe_compute_hashbit(u_int32_t crc) {
   u_int8_t hashbit = 0;
   int i;
 
   for (i = 0; i < 6; i++) {
     hashbit >>= 1;
     if (crc & 0x80000000) {
       hashbit &= 0x80;
     }
     crc <<= 1;
   }
   return (hashbit >> 2);
 }
 
 
 
 /**************************************************************
  *                                                            *
  *                  M I I  F U N C T I O N S                  *
  *                                                            *
  **************************************************************/
 
 /*
  * Alternative MII/PHY handling code adapted from the xl driver.  It doesn't
  * seem to work any better than the xirc2_ps stuff, but it's cleaner code.
  * XXX - this stuff shouldn't be here.  It should all be abstracted off to
  * XXX - some kind of common MII-handling code, shared by all drivers.  But
  * XXX - that's a whole other mission.
  */
 #define XE_MII_SET(x)	XE_OUTB(XE_GPR2, (XE_INB(XE_GPR2) | 0x04) | (x))
 #define XE_MII_CLR(x)	XE_OUTB(XE_GPR2, (XE_INB(XE_GPR2) | 0x04) & ~(x))
 
 
 /*
  * Sync the PHYs by setting data bit and strobing the clock 32 times.
  */
 static void
 xe_mii_sync(struct xe_softc *scp) {
   register int i;
 
   XE_SELECT_PAGE(2);
   XE_MII_SET(XE_MII_DIR|XE_MII_WRD);
 
   for (i = 0; i < 32; i++) {
     XE_MII_SET(XE_MII_CLK);
     DELAY(1);
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
   }
 }
 
 
 /*
  * Look for a MII-compliant PHY.  If we find one, reset it.
  */
 static int
 xe_mii_init(struct xe_softc *scp) {
   u_int16_t status;
 
   status = xe_phy_readreg(scp, PHY_BMSR);
   if ((status & 0xff00) != 0x7800) {
 #if XE_DEBUG > 1
     printf("xe%d: no PHY found, %0x\n", scp->unit, status);
 #endif
     return 0;
   }
   else {
 #if XE_DEBUG > 1
     printf("xe%d: PHY OK!\n", scp->unit);
 #endif
 
     /* Reset the PHY */
     xe_phy_writereg(scp, PHY_BMCR, PHY_BMCR_RESET);
     DELAY(500);
     while(xe_phy_readreg(scp, PHY_BMCR) & PHY_BMCR_RESET);
     XE_MII_DUMP(scp);
     return 1;
   }
 }
 
 
 /*
  * Clock a series of bits through the MII.
  */
 static void
 xe_mii_send(struct xe_softc *scp, u_int32_t bits, int cnt) {
   int i;
 
   XE_SELECT_PAGE(2);
   XE_MII_CLR(XE_MII_CLK);
   
   for (i = (0x1 << (cnt - 1)); i; i >>= 1) {
     if (bits & i) {
       XE_MII_SET(XE_MII_WRD);
     } else {
       XE_MII_CLR(XE_MII_WRD);
     }
     DELAY(1);
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
     XE_MII_SET(XE_MII_CLK);
   }
 }
 
 
 /*
  * Read an PHY register through the MII.
  */
 static int
 xe_mii_readreg(struct xe_softc *scp, struct xe_mii_frame *frame) {
   int i, ack, s;
 
   s = splimp();
 
   /*
    * Set up frame for RX.
    */
   frame->mii_stdelim = XE_MII_STARTDELIM;
   frame->mii_opcode = XE_MII_READOP;
   frame->mii_turnaround = 0;
   frame->mii_data = 0;
 	
   XE_SELECT_PAGE(2);
   XE_OUTB(XE_GPR2, 0);
 
   /*
    * Turn on data xmit.
    */
   XE_MII_SET(XE_MII_DIR);
 
   xe_mii_sync(scp);
 
   /*	
    * Send command/address info.
    */
   xe_mii_send(scp, frame->mii_stdelim, 2);
   xe_mii_send(scp, frame->mii_opcode, 2);
   xe_mii_send(scp, frame->mii_phyaddr, 5);
   xe_mii_send(scp, frame->mii_regaddr, 5);
 
   /* Idle bit */
   XE_MII_CLR((XE_MII_CLK|XE_MII_WRD));
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
 
   /* Turn off xmit. */
   XE_MII_CLR(XE_MII_DIR);
 
   /* Check for ack */
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
   ack = XE_INB(XE_GPR2) & XE_MII_RDD;
 
   /*
    * Now try reading data bits. If the ack failed, we still
    * need to clock through 16 cycles to keep the PHY(s) in sync.
    */
   if (ack) {
     for(i = 0; i < 16; i++) {
       XE_MII_CLR(XE_MII_CLK);
       DELAY(1);
       XE_MII_SET(XE_MII_CLK);
       DELAY(1);
     }
     goto fail;
   }
 
   for (i = 0x8000; i; i >>= 1) {
     XE_MII_CLR(XE_MII_CLK);
     DELAY(1);
     if (!ack) {
       if (XE_INB(XE_GPR2) & XE_MII_RDD)
 	frame->mii_data |= i;
       DELAY(1);
     }
     XE_MII_SET(XE_MII_CLK);
     DELAY(1);
   }
 
 fail:
 
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
 
   splx(s);
 
   if (ack)
     return(1);
   return(0);
 }
 
 
 /*
  * Write to a PHY register through the MII.
  */
 static int
 xe_mii_writereg(struct xe_softc *scp, struct xe_mii_frame *frame) {
   int s;
 
   s = splimp();
 
   /*
    * Set up frame for TX.
    */
   frame->mii_stdelim = XE_MII_STARTDELIM;
   frame->mii_opcode = XE_MII_WRITEOP;
   frame->mii_turnaround = XE_MII_TURNAROUND;
 	
   XE_SELECT_PAGE(2);
 
   /*		
    * Turn on data output.
    */
   XE_MII_SET(XE_MII_DIR);
 
   xe_mii_sync(scp);
 
   xe_mii_send(scp, frame->mii_stdelim, 2);
   xe_mii_send(scp, frame->mii_opcode, 2);
   xe_mii_send(scp, frame->mii_phyaddr, 5);
   xe_mii_send(scp, frame->mii_regaddr, 5);
   xe_mii_send(scp, frame->mii_turnaround, 2);
   xe_mii_send(scp, frame->mii_data, 16);
 
   /* Idle bit. */
   XE_MII_SET(XE_MII_CLK);
   DELAY(1);
   XE_MII_CLR(XE_MII_CLK);
   DELAY(1);
 
   /*
    * Turn off xmit.
    */
   XE_MII_CLR(XE_MII_DIR);
 
   splx(s);
 
   return(0);
 }
 
 
 /*
  * Read a register from the PHY.
  */
 static u_int16_t
 xe_phy_readreg(struct xe_softc *scp, u_int16_t reg) {
   struct xe_mii_frame frame;
 
   bzero((char *)&frame, sizeof(frame));
 
   frame.mii_phyaddr = 0;
   frame.mii_regaddr = reg;
   xe_mii_readreg(scp, &frame);
 
   return(frame.mii_data);
 }
 
 
 /*
  * Write to a PHY register.
  */
 static void
 xe_phy_writereg(struct xe_softc *scp, u_int16_t reg, u_int16_t data) {
   struct xe_mii_frame frame;
 
   bzero((char *)&frame, sizeof(frame));
 
   frame.mii_phyaddr = 0;
   frame.mii_regaddr = reg;
   frame.mii_data = data;
   xe_mii_writereg(scp, &frame);
 
   return;
 }
 
 
 #ifdef XE_DEBUG
 /*
  * A bit of debugging code.
  */
 static void
 xe_mii_dump(struct xe_softc *scp) {
   int i, s;
 
   s = splimp();
 
   printf("xe%d: MII registers: ", scp->unit);
   for (i = 0; i < 2; i++) {
     printf(" %d:%04x", i, xe_phy_readreg(scp, i));
   }
   for (i = 4; i < 7; i++) {
     printf(" %d:%04x", i, xe_phy_readreg(scp, i));
   }
   printf("\n");
 
   (void)splx(s);
 }
 
 static void
 xe_reg_dump(struct xe_softc *scp) {
   int page, i, s;
 
   s = splimp();
 
   printf("xe%d: Common registers: ", scp->unit);
   for (i = 0; i < 8; i++) {
     printf(" %2.2x", XE_INB(i));
   }
   printf("\n");
 
   for (page = 0; page <= 8; page++) {
     printf("xe%d: Register page %2.2x: ", scp->unit, page);
     XE_SELECT_PAGE(page);
     for (i = 8; i < 16; i++) {
       printf(" %2.2x", XE_INB(i));
     }
     printf("\n");
   }
 
   for (page = 0x10; page < 0x5f; page++) {
     if ((page >= 0x11 && page <= 0x3f) ||
 	(page == 0x41) ||
 	(page >= 0x43 && page <= 0x4f) ||
 	(page >= 0x59))
       continue;
     printf("xe%d: Register page %2.2x: ", scp->unit, page);
     XE_SELECT_PAGE(page);
     for (i = 8; i < 16; i++) {
       printf(" %2.2x", XE_INB(i));
     }
     printf("\n");
   }
 
   (void)splx(s);
 }
 #endif
 
 
 
 #if NAPM > 0
 /**************************************************************
  *                                                            *
  *                  A P M  F U N C T I O N S                  *
  *                                                            *
  **************************************************************/
 
 /*
  * This is called when we go into suspend/standby mode
  */
 static int
 xe_suspend(void *xunit) {
 
 #ifdef XE_DEBUG
   struct xe_softc *scp = sca[(int)xunit];
 
   printf("xe%d: APM suspend\n", scp->unit);
 #endif
 
   return 0;
 }
 
 /*
  * This is called when we wake up again
  */
 static int
 xe_resume(void *xunit) {
 
 #ifdef XE_DEBUG
   struct xe_softc *scp = sca[(int)xunit];
 
   printf("xe%d: APM resume\n", scp->unit);
 #endif
 
   return 0;
 }
 
 #endif /* NAPM > 0 */
 
 #endif /* NCARD > 0 */
 
 #endif /* NXE > 0 */
Index: head/sys/fs/cd9660/cd9660_vfsops.c
===================================================================
--- head/sys/fs/cd9660/cd9660_vfsops.c	(revision 49534)
+++ head/sys/fs/cd9660/cd9660_vfsops.c	(revision 49535)
@@ -1,956 +1,955 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
- * $Id: cd9660_vfsops.c,v 1.55 1999/05/08 06:39:32 phk Exp $
+ * $Id: cd9660_vfsops.c,v 1.56 1999/05/31 11:27:21 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/cdio.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/syslog.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/iso_rrip.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/cd9660_mount.h>
 
 MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
 MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
 
 static int cd9660_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int cd9660_start __P((struct mount *, int, struct proc *));
 static int cd9660_unmount __P((struct mount *, int, struct proc *));
 static int cd9660_root __P((struct mount *, struct vnode **));
 static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, 
 	    struct proc *));
 static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int cd9660_sync __P((struct mount *, int, struct ucred *, 
 	    struct proc *));
 static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
 static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int cd9660_vptofh __P((struct vnode *, struct fid *));
 
 static struct vfsops cd9660_vfsops = {
 	cd9660_mount,
 	cd9660_start,
 	cd9660_unmount,
 	cd9660_root,
 	cd9660_quotactl,
 	cd9660_statfs,
 	cd9660_sync,
 	cd9660_vget,
 	cd9660_fhtovp,
 	cd9660_vptofh,
 	cd9660_init
 };
 VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
 
 
 /*
  * Called by vfs_mountroot when iso is going to be mounted as root.
  */
 
 static int iso_get_ssector __P((dev_t dev, struct proc *p));
 static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
 			    struct proc *p, struct iso_args *argp));
 
 /*
  * Try to find the start of the last data track on this CD-ROM.  This
  * is used to mount the last session of a multi-session CD.  Bail out
  * and return 0 if we fail, this is always a safe bet.
  */
 static int
 iso_get_ssector(dev, p)
 	dev_t dev;
 	struct proc *p;
 {
 	struct ioc_toc_header h;
 	struct ioc_read_toc_single_entry t;
 	int i;
 	struct cdevsw *bd;
 	d_ioctl_t *ioctlp;
 
 	bd = bdevsw(dev);
 	ioctlp = bd->d_ioctl;
 	if (ioctlp == NULL)
 		return 0;
 
 	if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0)
 		return 0;
 
 	for (i = h.ending_track; i >= 0; i--) {
 		t.address_format = CD_LBA_FORMAT;
 		t.track = i;
 		if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0)
 			return 0;
 		if ((t.entry.control & 4) != 0)
 			/* found a data track */
 			break;
 	}
 
 	if (i < 0)
 		return 0;
 
 	return ntohl(t.entry.addr.lba);
 }
 
 static int iso_mountroot __P((struct mount *mp, struct proc *p));
 
 static int
 iso_mountroot(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 	struct iso_args args;
 	int error;
 
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("iso_mountroot: can't find rootvp");
 		return (error);
 	}
 	args.flags = ISOFSMNT_ROOT;
 	args.ssector = iso_get_ssector(rootdev, p);
 	if (bootverbose)
 		printf("iso_mountroot(): using session at block %d\n",
 		       args.ssector);
 	if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0)
 		return (error);
 
 	(void)cd9660_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 cd9660_mount(mp, path, data, ndp, p)
 	register struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct iso_args args;
 	size_t size;
 	int error;
 	mode_t accessmode;
 	struct iso_mnt *imp = 0;
 
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
 		if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		return (iso_mountroot(mp, p));
 	}
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
 		return (error);
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EROFS);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		imp = VFSTOISOFS(mp);
 		if (bdevsw(imp->im_devvp->v_rdev)->d_flags &
 		    D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (args.fspec == 0)
 			return (vfs_export(mp, &imp->im_export, &args.export));
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)))
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return ENOTBLK;
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return ENXIO;
 	}
 
 	/*       
 	 * Verify that user has necessary permissions on the device,
 	 * or has superuser abilities
 	 */
 	accessmode = VREAD;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 	if (error) 
 		error = suser(p);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 	VOP_UNLOCK(devvp, 0, p);
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		error = iso_mountfs(devvp, mp, p, &args);
 	} else {
 		if (devvp != imp->im_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return error;
 	}
 	imp = VFSTOISOFS(mp);
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
 	return 0;
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 iso_mountfs(devvp, mp, p, argp)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct iso_args *argp;
 {
 	register struct iso_mnt *isomp = (struct iso_mnt *)0;
 	struct buf *bp = NULL;
 	struct buf *pribp = NULL, *supbp = NULL;
 	dev_t dev = devvp->v_rdev;
 	int error = EINVAL;
 	int needclose = 0;
 	int high_sierra = 0;
 	int iso_bsize;
 	int iso_blknum;
 	int joliet_level;
 	struct iso_volume_descriptor *vdp = 0;
 	struct iso_primary_descriptor *pri = NULL;
 	struct iso_sierra_primary_descriptor *pri_sierra = NULL;
 	struct iso_supplementary_descriptor *sup = NULL;
 	struct iso_directory_record *rootp;
 	int logical_block_size;
 
 	if (!(mp->mnt_flag & MNT_RDONLY))
 		return EROFS;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)))
 		return error;
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return EBUSY;
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
 		return (error);
 
 	if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p)))
 		return error;
 	needclose = 1;
 
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
 	 * whichever is greater.  For now, we'll just use a constant.
 	 */
 	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
 
 	joliet_level = 0;
 	for (iso_blknum = 16 + argp->ssector;
 	     iso_blknum < 100 + argp->ssector;
 	     iso_blknum++) {
 		if ((error = bread(devvp, iso_blknum * btodb(iso_bsize),
 				  iso_bsize, NOCRED, &bp)) != 0)
 			goto out;
 		
 		vdp = (struct iso_volume_descriptor *)bp->b_data;
 		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
 			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
 				  sizeof vdp->id) != 0) {
 				error = EINVAL;
 				goto out;
 			} else
 				high_sierra = 1;
 		}
 		switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){
 		case ISO_VD_PRIMARY:
 			if (pribp == NULL) {
 				pribp = bp;
 				bp = NULL;
 				pri = (struct iso_primary_descriptor *)vdp;
 				pri_sierra =
 				  (struct iso_sierra_primary_descriptor *)vdp;
 			}
 			break;
 
 		case ISO_VD_SUPPLEMENTARY:
 			if (supbp == NULL) {
 				supbp = bp;
 				bp = NULL;
 				sup = (struct iso_supplementary_descriptor *)vdp;
 
 				if (!(argp->flags & ISOFSMNT_NOJOLIET)) {
 					if (bcmp(sup->escape, "%/@", 3) == 0)
 						joliet_level = 1;
 					if (bcmp(sup->escape, "%/C", 3) == 0)
 						joliet_level = 2;
 					if (bcmp(sup->escape, "%/E", 3) == 0)
 						joliet_level = 3;
 
 					if (isonum_711 (sup->flags) & 1)
 						joliet_level = 0;
 				}
 			}
 			break;
 
 		case ISO_VD_END:
 			goto vd_end;
 
 		default:
 			break;
 		}
 		if (bp) {
 			brelse(bp);
 			bp = NULL;
 		}
 	}
  vd_end:
 	if (bp) {
 		brelse(bp);
 		bp = NULL;
 	}
 
 	if (pri == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	logical_block_size =
 		isonum_723 (high_sierra?
 			    pri_sierra->logical_block_size:
 			    pri->logical_block_size);
 
 	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
 	    || (logical_block_size & (logical_block_size - 1)) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	rootp = (struct iso_directory_record *)
 		(high_sierra?
 		 pri_sierra->root_directory_record:
 		 pri->root_directory_record);
 
 	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
 	bzero((caddr_t)isomp, sizeof *isomp);
 	isomp->logical_block_size = logical_block_size;
 	isomp->volume_space_size =
 		isonum_733 (high_sierra?
 			    pri_sierra->volume_space_size:
 			    pri->volume_space_size);
 	isomp->joliet_level = 0;
 	/*
 	 * Since an ISO9660 multi-session CD can also access previous
 	 * sessions, we have to include them into the space consider-
 	 * ations.  This doesn't yield a very accurate number since
 	 * parts of the old sessions might be inaccessible now, but we
 	 * can't do much better.  This is also important for the NFS
 	 * filehandle validation.
 	 */
 	isomp->volume_space_size += argp->ssector;
 	bcopy (rootp, isomp->root, sizeof isomp->root);
 	isomp->root_extent = isonum_733 (rootp->extent);
 	isomp->root_size = isonum_733 (rootp->size);
 
 	isomp->im_bmask = logical_block_size - 1;
 	isomp->im_bshift = ffs(logical_block_size) - 1;
 
 	pribp->b_flags |= B_AGE;
 	brelse(pribp);
 	pribp = NULL;
 
 	mp->mnt_data = (qaddr_t)isomp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
 
 	devvp->v_specmountpoint = mp;
 
 	/* Check the Rock Ridge Extention support */
 	if (!(argp->flags & ISOFSMNT_NORRIP)) {
 		if ((error = bread(isomp->im_devvp,
 				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
 				  (isomp->im_bshift - DEV_BSHIFT),
 				  isomp->logical_block_size, NOCRED, &bp)) != 0)
 		    goto out;
 		
 		rootp = (struct iso_directory_record *)bp->b_data;
 		
 		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
 		    argp->flags	 |= ISOFSMNT_NORRIP;
 		} else {
 		    argp->flags	 &= ~ISOFSMNT_GENS;
 		}
 
 		/*
 		 * The contents are valid,
 		 * but they will get reread as part of another vnode, so...
 		 */
 		bp->b_flags |= B_AGE;
 		brelse(bp);
 		bp = NULL;
 	}
 	isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS |
 					 ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET);
 
 	if (high_sierra) {
 		/* this effectively ignores all the mount flags */
 		log(LOG_INFO, "cd9660: High Sierra Format\n");
 		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
 	} else
 		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
 		  default:
 			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
 			  break;
 		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
 			  isomp->iso_ftype = ISO_FTYPE_9660;
 			  break;
 		  case 0:
 			  log(LOG_INFO, "cd9660: RockRidge Extension\n");
 			  isomp->iso_ftype = ISO_FTYPE_RRIP;
 			  break;
 		}
 
 	/* Decide whether to use the Joliet descriptor */
 
 	if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) {
 		log(LOG_INFO, "cd9660: Joliet Extension\n");
 		rootp = (struct iso_directory_record *)
 			sup->root_directory_record;
 		bcopy (rootp, isomp->root, sizeof isomp->root);
 		isomp->root_extent = isonum_733 (rootp->extent);
 		isomp->root_size = isonum_733 (rootp->size);
 		isomp->joliet_level = joliet_level;
 		supbp->b_flags |= B_AGE;
 	}
 
 	if (supbp) {
 		brelse(supbp);
 		supbp = NULL;
 	}
 
 	return 0;
 out:
 	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	if (pribp)
 		brelse(pribp);
 	if (supbp)
 		brelse(supbp);
 	if (needclose)
 		(void)VOP_CLOSE(devvp, FREAD, NOCRED, p);
 	if (isomp) {
 		free((caddr_t)isomp, M_ISOFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return error;
 }
 
 /*
  * Make a filesystem operational.
  * Nothing to do at the moment.
  */
 /* ARGSUSED */
 static int
 cd9660_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	return 0;
 }
 
 /*
  * unmount system call
  */
 static int
 cd9660_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 	int error, flags = 0;
 	
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 #if 0
 	mntflushbuf(mp, 0);
 	if (mntinvalbuf(mp))
 		return EBUSY;
 #endif
 	if ((error = vflush(mp, NULLVP, flags)))
 		return (error);
 
 	isomp = VFSTOISOFS(mp);
 
 
 	isomp->im_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
 	vrele(isomp->im_devvp);
 	free((caddr_t)isomp, M_ISOFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 cd9660_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
 	ino_t ino = isodirino(dp, imp);
 	
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
 /*
  * Do operations associated with quotas, not supported
  */
 /* ARGSUSED */
 static int
 cd9660_quotactl(mp, cmd, uid, arg, p)
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * Get file system statistics.
  */
 int
 cd9660_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 
 	isomp = VFSTOISOFS(mp);
 
 	sbp->f_bsize = isomp->logical_block_size;
 	sbp->f_iosize = sbp->f_bsize;	/* XXX */
 	sbp->f_blocks = isomp->volume_space_size;
 	sbp->f_bfree = 0; /* total free blocks */
 	sbp->f_bavail = 0; /* blocks free for non superuser */
 	sbp->f_files =	0; /* total files */
 	sbp->f_ffree = 0; /* free file nodes */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 cd9660_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is in range
  * - call iget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the generation number matches
  */
 
 struct ifid {
 	ushort	ifid_len;
 	ushort	ifid_pad;
 	int	ifid_ino;
 	long	ifid_start;
 };
 
 /* ARGSUSED */
 int
 cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct ifid *ifhp = (struct ifid *)fhp;
 	register struct iso_node *ip;
 	register struct netcred *np;
 	register struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct vnode *nvp;
 	int error;
 	
 #ifdef	ISOFS_DBG
 	printf("fhtovp: ino %d, start %ld\n",
 	       ifhp->ifid_ino, ifhp->ifid_start);
 #endif
 	
 	/*
 	 * Get the export permission structure for this <mp, client> tuple.
 	 */
 	np = vfs_export_lookup(mp, &imp->im_export, nam);
 	if (np == NULL)
 		return (EACCES);
 
 	if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->inode.iso_mode == 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 int
 cd9660_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 
 	/*
 	 * XXXX
 	 * It would be nice if we didn't always set the `relocated' flag
 	 * and force the extra read, but I don't want to think about fixing
 	 * that right now.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 #if 0
 	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
 #else
 	    0,
 #endif
 	    (struct iso_directory_record *)0));
 }
 
 int
 cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 	int relocated;
 	struct iso_directory_record *isodir;
 {
 	struct iso_mnt *imp;
 	struct iso_node *ip;
 	struct buf *bp;
 	struct vnode *vp, *nvp;
 	dev_t dev;
 	int error;
 
 	imp = VFSTOISOFS(mp);
 	dev = imp->im_dev;
 	if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
 		return (0);
 
 	/* Allocate a new vnode/iso_node. */
 	if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
 	    M_WAITOK);
 	bzero((caddr_t)ip, sizeof(struct iso_node));
 	lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	cd9660_ihashins(ip);
 
 	if (isodir == 0) {
 		int lbn, off;
 
 		lbn = lblkno(imp, ino);
 		if (lbn >= imp->volume_space_size) {
 			vput(vp);
 			printf("fhtovp: lbn exceed volume space %d\n", lbn);
 			return (ESTALE);
 		}
 	
 		off = blkoff(imp, ino);
 		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
 			vput(vp);
 			printf("fhtovp: crosses block boundary %d\n",
 			       off + ISO_DIRECTORY_RECORD_SIZE);
 			return (ESTALE);
 		}
 	
 		error = bread(imp->im_devvp,
 			      lbn << (imp->im_bshift - DEV_BSHIFT),
 			      imp->logical_block_size, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			brelse(bp);
 			printf("fhtovp: bread error %d\n",error);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)(bp->b_data + off);
 
 		if (off + isonum_711(isodir->length) >
 		    imp->logical_block_size) {
 			vput(vp);
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
 			       off +isonum_711(isodir->length), off,
 			       isonum_711(isodir->length));
 			return (ESTALE);
 		}
 	
 #if 0
 		if (isonum_733(isodir->extent) +
 		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: file start miss %d vs %d\n",
 			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
 			       ifhp->ifid_start);
 			return (ESTALE);
 		}
 #endif
 	} else
 		bp = 0;
 
 	ip->i_mnt = imp;
 	ip->i_devvp = imp->im_devvp;
 	VREF(ip->i_devvp);
 
 	if (relocated) {
 		/*
 		 * On relocated directories we must
 		 * read the `.' entry out of a dir.
 		 */
 		ip->iso_start = ino >> imp->im_bshift;
 		if (bp != 0)
 			brelse(bp);
 		if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
 			vput(vp);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)bp->b_data;
 	}
 
 	ip->iso_extent = isonum_733(isodir->extent);
 	ip->i_size = isonum_733(isodir->size);
 	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
 	
 	/*
 	 * Setup time stamp, attribute
 	 */
 	vp->v_type = VNON;
 	switch (imp->iso_ftype) {
 	default:	/* ISO_FTYPE_9660 */
 	    {
 		struct buf *bp2;
 		int off;
 		if ((imp->im_flags & ISOFSMNT_EXTATT)
 		    && (off = isonum_711(isodir->ext_attr_length)))
 			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
 				     &bp2);
 		else
 			bp2 = NULL;
 		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
 		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
 		if (bp2)
 			brelse(bp2);
 		break;
 	    }
 	case ISO_FTYPE_RRIP:
 		cd9660_rrip_analyze(isodir, ip, imp);
 		break;
 	}
 
 	if (bp != 0)
 		brelse(bp);
 
 	/*
 	 * Initialize the associated vnode
 	 */
 	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
 	case VFIFO:
 		vp->v_op = cd9660_fifoop_p;
 		break;
 	case VCHR:
 	case VBLK:
 		/*
 		 * if device, look at device number table for translation
 		 */
 		vp->v_op = cd9660_specop_p;
 		if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) {
 			/*
 			 * Discard unneeded vnode, but save its iso_node.
 			 * Note that the lock is carried over in the iso_node
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	default:
 		break;
 	}
 	
 	if (ip->iso_extent == imp->root_extent)
 		vp->v_flag |= VROOT;
 
 	/*
 	 * XXX need generation number?
 	 */
 	
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 cd9660_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct iso_node *ip = VTOI(vp);
 	register struct ifid *ifhp;
 
 	ifhp = (struct ifid *)fhp;
 	ifhp->ifid_len = sizeof(struct ifid);
 
 	ifhp->ifid_ino = ip->i_number;
 	ifhp->ifid_start = ip->iso_start;
 
 #ifdef	ISOFS_DBG
 	printf("vptofh: ino %d, start %ld\n",
 	       ifhp->ifid_ino,ifhp->ifid_start);
 #endif
 	return 0;
 }
Index: head/sys/fs/cd9660/cd9660_vnops.c
===================================================================
--- head/sys/fs/cd9660/cd9660_vnops.c	(revision 49534)
+++ head/sys/fs/cd9660/cd9660_vnops.c	(revision 49535)
@@ -1,917 +1,917 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vnops.c	8.19 (Berkeley) 5/27/95
- * $Id: cd9660_vnops.c,v 1.55 1999/04/18 10:58:02 dcs Exp $
+ * $Id: cd9660_vnops.c,v 1.56 1999/05/11 19:54:25 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/unistd.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_zone.h>
 #include <vm/vnode_pager.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/iso_rrip.h>
 
 static int cd9660_setattr __P((struct vop_setattr_args *));
 static int cd9660_access __P((struct vop_access_args *));
 static int cd9660_getattr __P((struct vop_getattr_args *));
 static int cd9660_pathconf __P((struct vop_pathconf_args *));
 static int cd9660_read __P((struct vop_read_args *));
 struct isoreaddir;
 static int iso_uiodir __P((struct isoreaddir *idp, struct dirent *dp,
 			   off_t off));
 static int iso_shipdir __P((struct isoreaddir *idp));
 static int cd9660_readdir __P((struct vop_readdir_args *));
 static int cd9660_readlink __P((struct vop_readlink_args *ap));
 static int cd9660_abortop __P((struct vop_abortop_args *));
 static int cd9660_strategy __P((struct vop_strategy_args *));
 static int cd9660_print __P((struct vop_print_args *));
 static int cd9660_getpages __P((struct vop_getpages_args *));
 static int cd9660_putpages __P((struct vop_putpages_args *));
 
 /*
  * Setattr call. Only allowed for block and character special devices.
  */
 int
 cd9660_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 
   	if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
 		return (EROFS);
 	if (vap->va_size != (u_quad_t)VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
 		case VLNK:
 		case VREG:
 			return (EROFS);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 		case VNON:
 		case VBAD:
 			return (0);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
  * The mode is shifted to select the owner/group/other fields. The
  * super user is granted all permissions.
  */
 /* ARGSUSED */
 static int
 cd9660_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct iso_node *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	gid_t *gp;
 	int i;
 
 	/*
 	 * Disallow write attempts unless the file is a socket,
 	 * fifo, or a block or character device resident on the
 	 * file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			return (EROFS);
 			/* NOT REACHED */
 		default:
 			break;
 		}
 	}
 
 	/* User id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->inode.iso_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->inode.iso_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->inode.iso_mode & mask) == mask ?
 			    0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
 }
 
 static int
 cd9660_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 
 {
 	struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	register struct iso_node *ip = VTOI(vp);
 
 	vap->va_fsid	= dev2udev(ip->i_dev);
 	vap->va_fileid	= ip->i_number;
 
 	vap->va_mode	= ip->inode.iso_mode;
 	vap->va_nlink	= ip->inode.iso_links;
 	vap->va_uid	= ip->inode.iso_uid;
 	vap->va_gid	= ip->inode.iso_gid;
 	vap->va_atime	= ip->inode.iso_atime;
 	vap->va_mtime	= ip->inode.iso_mtime;
 	vap->va_ctime	= ip->inode.iso_ctime;
 	vap->va_rdev	= ip->inode.iso_rdev;
 
 	vap->va_size	= (u_quad_t) ip->i_size;
 	if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) {
 		struct vop_readlink_args rdlnk;
 		struct iovec aiov;
 		struct uio auio;
 		char *cp;
 
 		MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = ap->a_p;
 		auio.uio_resid = MAXPATHLEN;
 		rdlnk.a_uio = &auio;
 		rdlnk.a_vp = ap->a_vp;
 		rdlnk.a_cred = ap->a_cred;
 		if (cd9660_readlink(&rdlnk) == 0)
 			vap->va_size = MAXPATHLEN - auio.uio_resid;
 		FREE(cp, M_TEMP);
 	}
 	vap->va_flags	= 0;
 	vap->va_gen = 1;
 	vap->va_blocksize = ip->i_mnt->logical_block_size;
 	vap->va_bytes	= (u_quad_t) ip->i_size;
 	vap->va_type	= vp->v_type;
 	vap->va_filerev	= 0;
 	return (0);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 cd9660_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	register struct iso_node *ip = VTOI(vp);
 	register struct iso_mnt *imp;
 	struct buf *bp;
 	daddr_t lbn, rablock;
 	off_t diff;
 	int rasize, error = 0;
 	long size, n, on;
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip->i_flag |= IN_ACCESS;
 	imp = ip->i_mnt;
 	do {
 		lbn = lblkno(imp, uio->uio_offset);
 		on = blkoff(imp, uio->uio_offset);
 		n = min((u_int)(imp->logical_block_size - on),
 			uio->uio_resid);
 		diff = (off_t)ip->i_size - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		size = blksize(imp, ip, lbn);
 		rablock = lbn + 1;
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			if (lblktosize(imp, rablock) < ip->i_size)
 				error = cluster_read(vp, (off_t)ip->i_size,
 				         lbn, size, NOCRED, uio->uio_resid,
 					 (ap->a_ioflag >> 16), &bp);
 			else
 				error = bread(vp, lbn, size, NOCRED, &bp);
 		} else {
 			if (vp->v_lastr + 1 == lbn &&
 			    lblktosize(imp, rablock) < ip->i_size) {
 				rasize = blksize(imp, ip, rablock);
 				error = breadn(vp, lbn, size, &rablock,
 					       &rasize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, lbn, size, NOCRED, &bp);
 		}
 		vp->v_lastr = lbn;
 		n = min(n, size - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 
 		error = uiomove(bp->b_data + on, (int)n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	return (error);
 }
 
 /*
  * Structure for reading directories
  */
 struct isoreaddir {
 	struct dirent saveent;
 	struct dirent assocent;
 	struct dirent current;
 	off_t saveoff;
 	off_t assocoff;
 	off_t curroff;
 	struct uio *uio;
 	off_t uio_off;
 	int eofflag;
 	u_long *cookies;
 	int ncookies;
 };
 
 int
 iso_uiodir(idp,dp,off)
 	struct isoreaddir *idp;
 	struct dirent *dp;
 	off_t off;
 {
 	int error;
 
 	dp->d_name[dp->d_namlen] = 0;
 	dp->d_reclen = GENERIC_DIRSIZ(dp);
 
 	if (idp->uio->uio_resid < dp->d_reclen) {
 		idp->eofflag = 0;
 		return (-1);
 	}
 
 	if (idp->cookies) {
 		if (idp->ncookies <= 0) {
 			idp->eofflag = 0;
 			return (-1);
 		}
 
 		*idp->cookies++ = off;
 		--idp->ncookies;
 	}
 
 	if ((error = uiomove((caddr_t) dp,dp->d_reclen,idp->uio)) != 0)
 		return (error);
 	idp->uio_off = off;
 	return (0);
 }
 
 int
 iso_shipdir(idp)
 	struct isoreaddir *idp;
 {
 	struct dirent *dp;
 	int cl, sl, assoc;
 	int error;
 	char *cname, *sname;
 
 	cl = idp->current.d_namlen;
 	cname = idp->current.d_name;
 assoc = (cl > 1) && (*cname == ASSOCCHAR);
 	if (assoc) {
 		cl--;
 		cname++;
 	}
 
 	dp = &idp->saveent;
 	sname = dp->d_name;
 	if (!(sl = dp->d_namlen)) {
 		dp = &idp->assocent;
 		sname = dp->d_name + 1;
 		sl = dp->d_namlen - 1;
 	}
 	if (sl > 0) {
 		if (sl != cl
 		    || bcmp(sname,cname,sl)) {
 			if (idp->assocent.d_namlen) {
 				if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0)
 					return (error);
 				idp->assocent.d_namlen = 0;
 			}
 			if (idp->saveent.d_namlen) {
 				if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0)
 					return (error);
 				idp->saveent.d_namlen = 0;
 			}
 		}
 	}
 	idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current);
 	if (assoc) {
 		idp->assocoff = idp->curroff;
 		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
 	} else {
 		idp->saveoff = idp->curroff;
 		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
 	}
 	return (0);
 }
 
 /*
  * Vnode op for readdir
  */
 static int
 cd9660_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long *a_cookies;
 	} */ *ap;
 {
 	register struct uio *uio = ap->a_uio;
 	struct isoreaddir *idp;
 	struct vnode *vdp = ap->a_vp;
 	struct iso_node *dp;
 	struct iso_mnt *imp;
 	struct buf *bp = NULL;
 	struct iso_directory_record *ep;
 	int entryoffsetinblock;
 	doff_t endsearch;
 	u_long bmask;
 	int error = 0;
 	int reclen;
 	u_short namelen;
 	int ncookies = 0;
 	u_long *cookies = NULL;
 
 	dp = VTOI(vdp);
 	imp = dp->i_mnt;
 	bmask = imp->im_bmask;
 
 	MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK);
 	idp->saveent.d_namlen = idp->assocent.d_namlen = 0;
 	/*
 	 * XXX
 	 * Is it worth trying to figure out the type?
 	 */
 	idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type =
 	    DT_UNKNOWN;
 	idp->uio = uio;
 	if (ap->a_ncookies == NULL) {
 		idp->cookies = NULL;
 	} else {
 		/*
 		 * Guess the number of cookies needed.
 		 */
 		ncookies = uio->uio_resid / 16;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_int), M_TEMP,
 		    M_WAITOK);
 		idp->cookies = cookies;
 		idp->ncookies = ncookies;
 	}
 	idp->eofflag = 1;
 	idp->curroff = uio->uio_offset;
 
 	if ((entryoffsetinblock = idp->curroff & bmask) &&
 	    (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) {
 		FREE(idp, M_TEMP);
 		return (error);
 	}
 	endsearch = dp->i_size;
 
 	while (idp->curroff < endsearch) {
 		/*
 		 * If offset is on a block boundary,
 		 * read the next directory block.
 		 * Release previous if it exists.
 		 */
 		if ((idp->curroff & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			if ((error =
 			    cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0)
 				break;
 			entryoffsetinblock = 0;
 		}
 		/*
 		 * Get pointer to next entry.
 		 */
 		ep = (struct iso_directory_record *)
 			((char *)bp->b_data + entryoffsetinblock);
 
 		reclen = isonum_711(ep->length);
 		if (reclen == 0) {
 			/* skip to next block, if any */
 			idp->curroff =
 			    (idp->curroff & ~bmask) + imp->logical_block_size;
 			continue;
 		}
 
 		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
 			error = EINVAL;
 			/* illegal entry, stop */
 			break;
 		}
 
 		if (entryoffsetinblock + reclen > imp->logical_block_size) {
 			error = EINVAL;
 			/* illegal directory, so stop looking */
 			break;
 		}
 
 		idp->current.d_namlen = isonum_711(ep->name_len);
 
 		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
 			error = EINVAL;
 			/* illegal entry, stop */
 			break;
 		}
 
 		if (isonum_711(ep->flags)&2)
 			idp->current.d_fileno = isodirino(ep, imp);
 		else
 			idp->current.d_fileno = dbtob(bp->b_blkno) +
 				entryoffsetinblock;
 
 		idp->curroff += reclen;
 
 		switch (imp->iso_ftype) {
 		case ISO_FTYPE_RRIP:
 			cd9660_rrip_getname(ep,idp->current.d_name, &namelen,
 					   &idp->current.d_fileno,imp);
 			idp->current.d_namlen = (u_char)namelen;
 			if (idp->current.d_namlen)
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			break;
 		default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/
 			strcpy(idp->current.d_name,"..");
 			if (idp->current.d_namlen == 1 && ep->name[0] == 0) {
 				idp->current.d_namlen = 1;
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			} else if (idp->current.d_namlen == 1 && ep->name[0] == 1) {
 				idp->current.d_namlen = 2;
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			} else {
 				isofntrans(ep->name,idp->current.d_namlen,
 					   idp->current.d_name, &namelen,
 					   imp->iso_ftype == ISO_FTYPE_9660,
 					   isonum_711(ep->flags)&4,
 					   imp->joliet_level);
 				idp->current.d_namlen = (u_char)namelen;
 				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
 					error = iso_shipdir(idp);
 				else
 					error = iso_uiodir(idp,&idp->current,idp->curroff);
 			}
 		}
 		if (error)
 			break;
 
 		entryoffsetinblock += reclen;
 	}
 
 	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
 		idp->current.d_namlen = 0;
 		error = iso_shipdir(idp);
 	}
 	if (error < 0)
 		error = 0;
 
 	if (ap->a_ncookies != NULL) {
 		if (error)
 			free(cookies, M_TEMP);
 		else {
 			/*
 			 * Work out the number of cookies actually used.
 			 */
 			*ap->a_ncookies = ncookies - idp->ncookies;
 			*ap->a_cookies = cookies;
 		}
 	}
 
 	if (bp)
 		brelse (bp);
 
 	uio->uio_offset = idp->uio_off;
 	*ap->a_eofflag = idp->eofflag;
 
 	FREE(idp, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  * Shouldn't we get the parent vnode and read the data from there?
  * This could eventually result in deadlocks in cd9660_lookup.
  * But otherwise the block read here is in the block buffer two times.
  */
 typedef struct iso_directory_record ISODIR;
 typedef struct iso_node		    ISONODE;
 typedef struct iso_mnt		    ISOMNT;
 static int
 cd9660_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	ISONODE	*ip;
 	ISODIR	*dirp;
 	ISOMNT	*imp;
 	struct	buf *bp;
 	struct	uio *uio;
 	u_short	symlen;
 	int	error;
 	char	*symname;
 
 	ip  = VTOI(ap->a_vp);
 	imp = ip->i_mnt;
 	uio = ap->a_uio;
 
 	if (imp->iso_ftype != ISO_FTYPE_RRIP)
 		return (EINVAL);
 
 	/*
 	 * Get parents directory record block that this inode included.
 	 */
 	error = bread(imp->im_devvp,
 		      (ip->i_number >> imp->im_bshift) <<
 		      (imp->im_bshift - DEV_BSHIFT),
 		      imp->logical_block_size, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Setup the directory pointer for this inode
 	 */
 	dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask));
 
 	/*
 	 * Just make sure, we have a right one....
 	 *   1: Check not cross boundary on block
 	 */
 	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
 	    > (unsigned)imp->logical_block_size) {
 		brelse(bp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Now get a buffer
 	 * Abuse a namei buffer for now.
 	 */
 	if (uio->uio_segflg == UIO_SYSSPACE)
 		symname = uio->uio_iov->iov_base;
 	else
 		symname = zalloc(namei_zone);
 	
 	/*
 	 * Ok, we just gathering a symbolic name in SL record.
 	 */
 	if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) {
 		if (uio->uio_segflg != UIO_SYSSPACE)
 			zfree(namei_zone, symname);
 		brelse(bp);
 		return (EINVAL);
 	}
 	/*
 	 * Don't forget before you leave from home ;-)
 	 */
 	brelse(bp);
 
 	/*
 	 * return with the symbolic name to caller's.
 	 */
 	if (uio->uio_segflg != UIO_SYSSPACE) {
 		error = uiomove(symname, symlen, uio);
 		zfree(namei_zone, symname);
 		return (error);
 	}
 	uio->uio_resid -= symlen;
 	uio->uio_iov->iov_base += symlen;
 	uio->uio_iov->iov_len -= symlen;
 	return (0);
 }
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
  * done. If a buffer has been saved in anticipation of a CREATE, delete it.
  */
 static int
 cd9660_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 static int
 cd9660_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct iso_node *ip;
 	int error;
 
 	ip = VTOI(vp);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("cd9660_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		if ((error =
 		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL))) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	vp = ip->i_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOP_STRATEGY(vp, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 cd9660_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_ISOFS, isofs vnode\n");
 	return (0);
 }
 
 /*
  * Return POSIX pathconf information applicable to cd9660 filesystems.
  */
 static int
 cd9660_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP)
 			*ap->a_retval = NAME_MAX;
 		else
 			*ap->a_retval = 37;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * get page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 cd9660_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 /*
  * put page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 cd9660_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 
 /*
  * Global vfs data structures for cd9660
  */
 vop_t **cd9660_vnodeop_p;
 static struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) cd9660_abortop },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_bmap_desc,		(vop_t *) cd9660_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) cd9660_lookup },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_pathconf_desc,		(vop_t *) cd9660_pathconf },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_read_desc,		(vop_t *) cd9660_read },
 	{ &vop_readdir_desc,		(vop_t *) cd9660_readdir },
 	{ &vop_readlink_desc,		(vop_t *) cd9660_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_strategy_desc,		(vop_t *) cd9660_strategy },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_getpages_desc,		(vop_t *) cd9660_getpages },
 	{ &vop_putpages_desc,		(vop_t *) cd9660_putpages },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_vnodeop_opv_desc =
 	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
 VNODEOP_SET(cd9660_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **cd9660_specop_p;
 static struct vnodeopv_entry_desc cd9660_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_specop_opv_desc =
 	{ &cd9660_specop_p, cd9660_specop_entries };
 VNODEOP_SET(cd9660_specop_opv_desc);
 
 vop_t **cd9660_fifoop_p;
 static struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_fifoop_opv_desc =
 	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
 
 VNODEOP_SET(cd9660_fifoop_opv_desc);
Index: head/sys/fs/coda/coda_vfsops.c
===================================================================
--- head/sys/fs/coda/coda_vfsops.c	(revision 49534)
+++ head/sys/fs/coda/coda_vfsops.c	(revision 49535)
@@ -1,589 +1,587 @@
 /*
  * 
  *             Coda: an Experimental Distributed File System
  *                              Release 3.1
  * 
  *           Copyright (c) 1987-1998 Carnegie Mellon University
  *                          All Rights Reserved
  * 
  * Permission  to  use, copy, modify and distribute this software and its
  * documentation is hereby granted,  provided  that  both  the  copyright
  * notice  and  this  permission  notice  appear  in  all  copies  of the
  * software, derivative works or  modified  versions,  and  any  portions
  * thereof, and that both notices appear in supporting documentation, and
  * that credit is given to Carnegie Mellon University  in  all  documents
  * and publicity pertaining to direct or indirect use of this code or its
  * derivatives.
  * 
  * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
  * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
  * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
  * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
  * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
  * ANY DERIVATIVE WORK.
  * 
  * Carnegie  Mellon  encourages  users  of  this  software  to return any
  * improvements or extensions that  they  make,  and  to  grant  Carnegie
  * Mellon the rights to redistribute these changes without encumbrance.
  * 
  *  	@(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
- *  $Id: coda_vfsops.c,v 1.15 1999/07/20 07:18:17 phk Exp $
+ *  $Id: coda_vfsops.c,v 1.16 1999/07/21 12:51:36 phk Exp $
  * 
  */
 
 /* 
  * Mach Operating System
  * Copyright (c) 1989 Carnegie-Mellon University
  * All rights reserved.  The CMU software License Agreement specifies
  * the terms and conditions for use and redistribution.
  */
 
 /*
  * This code was written for the Coda file system at Carnegie Mellon
  * University.  Contributers include David Steere, James Kistler, and
  * M. Satyanarayanan.  
  */
 
 #include <vcoda.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/namei.h>
 #include <sys/mount.h>
 #include <sys/select.h>
 
 #include <coda/coda.h>
 #include <coda/cnode.h>
 #include <coda/coda_vfsops.h>
 #include <coda/coda_venus.h>
 #include <coda/coda_subr.h>
 #include <coda/coda_opstats.h>
-
-#include <miscfs/specfs/specdev.h>
 
 MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures");
 
 int codadebug = 0;
 int coda_vfsop_print_entry = 0;
 #define ENTRY    if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
 
 struct vnode *coda_ctlvp;
 struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */
 
 /* structure to keep statistics of internally generated/satisfied calls */
 
 struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];
 
 #define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
 #define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
 #define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
 #define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)
 
 extern int coda_nc_initialized;     /* Set if cache has been initialized */
 extern int vc_nb_open __P((dev_t, int, int, struct proc *));
 
 int
 coda_vfsopstats_init(void)
 {
 	register int i;
 	
 	for (i=0;i<CODA_VFSOPS_SIZE;i++) {
 		coda_vfsopstats[i].opcode = i;
 		coda_vfsopstats[i].entries = 0;
 		coda_vfsopstats[i].sat_intrn = 0;
 		coda_vfsopstats[i].unsat_intrn = 0;
 		coda_vfsopstats[i].gen_intrn = 0;
 	}
 	
 	return 0;
 }
 
 /*
  * cfs mount vfsop
  * Set up mount info record and attach it to vfs struct.
  */
 /*ARGSUSED*/
 int
 coda_mount(vfsp, path, data, ndp, p)
     struct mount *vfsp;		/* Allocated and initialized by mount(2) */
     char *path;			/* path covered: ignored by the fs-layer */
     caddr_t data;		/* Need to define a data type for this in netbsd? */
     struct nameidata *ndp;	/* Clobber this to lookup the device name */
     struct proc *p;		/* The ever-famous proc pointer */
 {
     struct vnode *dvp;
     struct cnode *cp;
     dev_t dev;
     struct coda_mntinfo *mi;
     struct vnode *rootvp;
     ViceFid rootfid;
     ViceFid ctlfid;
     int error;
 
     ENTRY;
 
     coda_vfsopstats_init();
     coda_vnodeopstats_init();
     
     MARK_ENTRY(CODA_MOUNT_STATS);
     if (CODA_MOUNTED(vfsp)) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(EBUSY);
     }
     
     /* Validate mount device.  Similar to getmdev(). */
 
     NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, data, p);
     error = namei(ndp);
     dvp = ndp->ni_vp;
 
     if (error) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return (error);
     }
     if (dvp->v_type != VCHR) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	vrele(dvp);
 	return(ENXIO);
     }
     dev = dvp->v_rdev;
     vrele(dvp);
 
     /*
      * See if the device table matches our expectations.
      */
     if (devsw(dev)->d_open != vc_nb_open)
     {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENXIO);
     }
     
     if (minor(dev) >= NVCODA || minor(dev) < 0) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENXIO);
     }
     
     /*
      * Initialize the mount record and link it to the vfs struct
      */
     mi = &coda_mnttbl[minor(dev)];
     
     if (!VC_OPEN(&mi->mi_vcomm)) {
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
 	return(ENODEV);
     }
     
     /* No initialization (here) of mi_vcomm! */
     vfsp->mnt_data = (qaddr_t)mi;
     vfs_getnewfsid (vfsp);
 
     mi->mi_vfsp = vfsp;
     
     /*
      * Make a root vnode to placate the Vnode interface, but don't
      * actually make the CODA_ROOT call to venus until the first call
      * to coda_root in case a server is down while venus is starting.
      */
     rootfid.Volume = 0;
     rootfid.Vnode = 0;
     rootfid.Unique = 0;
     cp = make_coda_node(&rootfid, vfsp, VDIR);
     rootvp = CTOV(cp);
     rootvp->v_flag |= VROOT;
 	
     ctlfid.Volume = CTL_VOL;
     ctlfid.Vnode = CTL_VNO;
     ctlfid.Unique = CTL_UNI;
 /*  cp = make_coda_node(&ctlfid, vfsp, VCHR);
     The above code seems to cause a loop in the cnode links.
     I don't totally understand when it happens, it is caught
     when closing down the system.
  */
     cp = make_coda_node(&ctlfid, 0, VCHR);
 
     coda_ctlvp = CTOV(cp);
 
     /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
     mi->mi_vfsp = vfsp;
     mi->mi_rootvp = rootvp;
     
     /* set filesystem block size */
     vfsp->mnt_stat.f_bsize = 8192;	    /* XXX -JJK */
 
     /* Set f_iosize.  XXX -- inamura@isl.ntt.co.jp. 
        For vnode_pager_haspage() references. The value should be obtained 
        from underlying UFS. */
     /* Checked UFS. iosize is set as 8192 */
     vfsp->mnt_stat.f_iosize = 8192;
 
     /* error is currently guaranteed to be zero, but in case some
        code changes... */
     CODADEBUG(1,
 	     myprintf(("coda_mount returned %d\n",error)););
     if (error)
 	MARK_INT_FAIL(CODA_MOUNT_STATS);
     else
 	MARK_INT_SAT(CODA_MOUNT_STATS);
     
     return(error);
 }
 
 int
 coda_start(vfsp, flags, p)
     struct mount *vfsp;
     int flags;
     struct proc *p;
 {
     ENTRY;
     return (0);
 }
 
 int
 coda_unmount(vfsp, mntflags, p)
     struct mount *vfsp;
     int mntflags;
     struct proc *p;
 {
     struct coda_mntinfo *mi = vftomi(vfsp);
     int active, error = 0;
     
     ENTRY;
     MARK_ENTRY(CODA_UMOUNT_STATS);
     if (!CODA_MOUNTED(vfsp)) {
 	MARK_INT_FAIL(CODA_UMOUNT_STATS);
 	return(EINVAL);
     }
     
     if (mi->mi_vfsp == vfsp) {	/* We found the victim */
 	if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
 	    return (EBUSY); 	/* Venus is still running */
 
 #ifdef	DEBUG
 	printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
 #endif
 	vrele(mi->mi_rootvp);
 
 	active = coda_kill(vfsp, NOT_DOWNCALL);
 	mi->mi_rootvp->v_flag &= ~VROOT;
 	error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE);
 	printf("coda_unmount: active = %d, vflush active %d\n", active, error);
 	error = 0;
 	/* I'm going to take this out to allow lookups to go through. I'm
 	 * not sure it's important anyway. -- DCS 2/2/94
 	 */
 	/* vfsp->VFS_DATA = NULL; */
 
 	/* No more vfsp's to hold onto */
 	mi->mi_vfsp = NULL;
 	mi->mi_rootvp = NULL;
 
 	if (error)
 	    MARK_INT_FAIL(CODA_UMOUNT_STATS);
 	else
 	    MARK_INT_SAT(CODA_UMOUNT_STATS);
 
 	return(error);
     }
     return (EINVAL);
 }
 
 /*
  * find root of cfs
  */
 int
 coda_root(vfsp, vpp)
 	struct mount *vfsp;
 	struct vnode **vpp;
 {
     struct coda_mntinfo *mi = vftomi(vfsp);
     struct vnode **result;
     int error;
     struct proc *p = curproc;    /* XXX - bnoble */
     ViceFid VFid;
 
     ENTRY;
     MARK_ENTRY(CODA_ROOT_STATS);
     result = NULL;
     
     if (vfsp == mi->mi_vfsp) {
 	if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) ||
 	    (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) ||
 	    (VTOC(mi->mi_rootvp)->c_fid.Unique != 0))
 	    { /* Found valid root. */
 		*vpp = mi->mi_rootvp;
 		/* On Mach, this is vref.  On NetBSD, VOP_LOCK */
 #if	1
 		vref(*vpp);
 		vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 		vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 		MARK_INT_SAT(CODA_ROOT_STATS);
 		return(0);
 	    }
     }
 
     error = venus_root(vftomi(vfsp), p->p_cred->pc_ucred, p, &VFid);
 
     if (!error) {
 	/*
 	 * Save the new rootfid in the cnode, and rehash the cnode into the
 	 * cnode hash with the new fid key.
 	 */
 	coda_unsave(VTOC(mi->mi_rootvp));
 	VTOC(mi->mi_rootvp)->c_fid = VFid;
 	coda_save(VTOC(mi->mi_rootvp));
 
 	*vpp = mi->mi_rootvp;
 #if	1
 	vref(*vpp);
 	vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 	vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 
 	MARK_INT_SAT(CODA_ROOT_STATS);
 	goto exit;
     } else if (error == ENODEV || error == EINTR) {
 	/* Gross hack here! */
 	/*
 	 * If Venus fails to respond to the CODA_ROOT call, coda_call returns
 	 * ENODEV. Return the uninitialized root vnode to allow vfs
 	 * operations such as unmount to continue. Without this hack,
 	 * there is no way to do an unmount if Venus dies before a 
 	 * successful CODA_ROOT call is done. All vnode operations 
 	 * will fail.
 	 */
 	*vpp = mi->mi_rootvp;
 #if	1
 	vref(*vpp);
 	vn_lock(*vpp, LK_EXCLUSIVE, p);
 #else
 	vget(*vpp, LK_EXCLUSIVE, p);
 #endif
 
 	MARK_INT_FAIL(CODA_ROOT_STATS);
 	error = 0;
 	goto exit;
     } else {
 	CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
 	MARK_INT_FAIL(CODA_ROOT_STATS);
 		
 	goto exit;
     }
 
  exit:
     return(error);
 }
 
 int
 coda_quotactl(vfsp, cmd, uid, arg, p)
     struct mount *vfsp;
     int cmd;
     uid_t uid;
     caddr_t arg;
     struct proc *p;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
      
 /*
  * Get file system statistics.
  */
 int
 coda_nb_statfs(vfsp, sbp, p)
     register struct mount *vfsp;
     struct statfs *sbp;
     struct proc *p;
 {
     ENTRY;
 /*  MARK_ENTRY(CODA_STATFS_STATS); */
     if (!CODA_MOUNTED(vfsp)) {
 /*	MARK_INT_FAIL(CODA_STATFS_STATS);*/
 	return(EINVAL);
     }
     
     bzero(sbp, sizeof(struct statfs));
     /* XXX - what to do about f_flags, others? --bnoble */
     /* Below This is what AFS does
     	#define NB_SFS_SIZ 0x895440
      */
     /* Note: Normal fs's have a bsize of 0x400 == 1024 */
     sbp->f_type = vfsp->mnt_vfc->vfc_typenum;
     sbp->f_bsize = 8192; /* XXX */
     sbp->f_iosize = 8192; /* XXX */
 #define NB_SFS_SIZ 0x8AB75D
     sbp->f_blocks = NB_SFS_SIZ;
     sbp->f_bfree = NB_SFS_SIZ;
     sbp->f_bavail = NB_SFS_SIZ;
     sbp->f_files = NB_SFS_SIZ;
     sbp->f_ffree = NB_SFS_SIZ;
     bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t));
     snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda");
     snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA");
 /*  MARK_INT_SAT(CODA_STATFS_STATS); */
     return(0);
 }
 
 /*
  * Flush any pending I/O.
  */
 int
 coda_sync(vfsp, waitfor, cred, p)
     struct mount *vfsp;
     int    waitfor;
     struct ucred *cred;
     struct proc *p;
 {
     ENTRY;
     MARK_ENTRY(CODA_SYNC_STATS);
     MARK_INT_SAT(CODA_SYNC_STATS);
     return(0);
 }
 
 int
 coda_vget(vfsp, ino, vpp)
     struct mount *vfsp;
     ino_t ino;
     struct vnode **vpp;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
 
 /* 
  * fhtovp is now what vget used to be in 4.3-derived systems.  For
  * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
  * a type-specific fid.  
  */
 int
 coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp)
     register struct mount *vfsp;    
     struct fid *fhp;
     struct mbuf *nam;
     struct vnode **vpp;
     int *exflagsp;
     struct ucred **creadanonp;
 {
     struct cfid *cfid = (struct cfid *)fhp;
     struct cnode *cp = 0;
     int error;
     struct proc *p = curproc; /* XXX -mach */
     ViceFid VFid;
     int vtype;
 
     ENTRY;
     
     MARK_ENTRY(CODA_VGET_STATS);
     /* Check for vget of control object. */
     if (IS_CTL_FID(&cfid->cfid_fid)) {
 	*vpp = coda_ctlvp;
 	vref(coda_ctlvp);
 	MARK_INT_SAT(CODA_VGET_STATS);
 	return(0);
     }
     
     error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_cred->pc_ucred, p, &VFid, &vtype);
     
     if (error) {
 	CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
 	    *vpp = (struct vnode *)0;
     } else {
 	CODADEBUG(CODA_VGET, 
 		 myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n",
 			VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); )
 	    
 	cp = make_coda_node(&VFid, vfsp, vtype);
 	*vpp = CTOV(cp);
     }
     return(error);
 }
 
 int
 coda_vptofh(vnp, fidp)
     struct vnode *vnp;
     struct fid   *fidp;
 {
     ENTRY;
     return (EOPNOTSUPP);
 }
 
 int
 coda_init(struct vfsconf *vfsp)
 {
     ENTRY;
     return 0;
 }
 
 /*
  * To allow for greater ease of use, some vnodes may be orphaned when
  * Venus dies.  Certain operations should still be allowed to go
  * through, but without propagating ophan-ness.  So this function will
  * get a new vnode for the file from the current run of Venus.  */
  
 int
 getNewVnode(vpp)
      struct vnode **vpp;
 {
     struct cfid cfid;
     struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);
     
     ENTRY;
 
     cfid.cfid_len = (short)sizeof(ViceFid);
     cfid.cfid_fid = VTOC(*vpp)->c_fid;	/* Structure assignment. */
     /* XXX ? */
 
     /* We're guessing that if set, the 1st element on the list is a
      * valid vnode to use. If not, return ENODEV as venus is dead.
      */
     if (mi->mi_vfsp == NULL)
 	return ENODEV;
     
     return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
 		      NULL, NULL);
 }
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 /* get the mount structure corresponding to a given device.  Assume 
  * device corresponds to a UFS. Return NULL if no device is found.
  */ 
 struct mount *devtomp(dev)
     dev_t dev;
 {
     struct mount *mp, *nmp;
     
     for (mp = mountlist.cqh_first; mp != (void*)&mountlist; mp = nmp) {
 	nmp = mp->mnt_list.cqe_next;
 	if (((VFSTOUFS(mp))->um_dev == dev)) {
 	    /* mount corresponds to UFS and the device matches one we want */
 	    return(mp); 
 	}
     }
     /* mount structure wasn't found */ 
     return(NULL); 
 }
 
 struct vfsops coda_vfsops = {
     coda_mount,
     coda_start,
     coda_unmount,
     coda_root,
     coda_quotactl,
     coda_nb_statfs,
     coda_sync,
     coda_vget,
     (int (*) (struct mount *, struct fid *, struct sockaddr *, struct vnode **,
 	      int *, struct ucred **))
 	eopnotsupp,
     (int (*) (struct vnode *, struct fid *)) eopnotsupp,
     coda_init,
 };
 
 VFS_SET(coda_vfsops, coda, VFCF_NETWORK);
Index: head/sys/fs/msdosfs/msdosfs_vfsops.c
===================================================================
--- head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 49534)
+++ head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 49535)
@@ -1,1017 +1,1016 @@
-/*	$Id: msdosfs_vfsops.c,v 1.44 1999/05/08 06:40:00 phk Exp $ */
+/*	$Id: msdosfs_vfsops.c,v 1.45 1999/05/31 11:28:02 phk Exp $ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h> 				/* defines ALLPERMS */
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/bootsect.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
 static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
 
 static int	update_mp __P((struct mount *mp, struct msdosfs_args *argp));
 static int	mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
 				  struct proc *p, struct msdosfs_args *argp));
 static int	msdosfs_fhtovp __P((struct mount *, struct fid *,
 				    struct sockaddr *, struct vnode **, int *,
 				    struct ucred **));
 static int	msdosfs_mount __P((struct mount *, char *, caddr_t,
 				   struct nameidata *, struct proc *));
 static int	msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				      struct proc *));
 static int	msdosfs_root __P((struct mount *, struct vnode **));
 static int	msdosfs_start __P((struct mount *, int, struct proc *));
 static int	msdosfs_statfs __P((struct mount *, struct statfs *,
 				    struct proc *));
 static int	msdosfs_sync __P((struct mount *, int, struct ucred *,
 				  struct proc *));
 static int	msdosfs_unmount __P((struct mount *, int, struct proc *));
 static int	msdosfs_vget __P((struct mount *mp, ino_t ino,
 				  struct vnode **vpp));
 static int	msdosfs_vptofh __P((struct vnode *, struct fid *));
 
 static int
 update_mp(mp, argp)
 	struct mount *mp;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error;
 
 	pmp->pm_gid = argp->gid;
 	pmp->pm_uid = argp->uid;
 	pmp->pm_mask = argp->mask & ALLPERMS;
 	pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
 	if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
 		bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
 		bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
 		bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
 		bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
 		bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
 	}
 
 #ifndef __FreeBSD__
 	/*
 	 * GEMDOS knows nothing (yet) about win95
 	 */
 	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
 #endif
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 	else if (!(pmp->pm_flags &
 	    (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
 		struct vnode *rootvp;
 
 		/*
 		 * Try to divine whether to support Win'95 long filenames
 		 */
 		if (FAT32(pmp))
 			pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
 		else {
 			if ((error = msdosfs_root(mp, &rootvp)) != 0)
 				return error;
 			pmp->pm_flags |= findwin95(VTODE(rootvp))
 				? MSDOSFSMNT_LONGNAME
 					: MSDOSFSMNT_SHORTNAME;
 			vput(rootvp);
 		}
 	}
 	return 0;
 }
 
 #ifndef __FreeBSD__
 int
 msdosfs_mountroot()
 {
 	register struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	size_t size;
 	int error;
 	struct msdosfs_args args;
 
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
 	/*
 	 * Get vnodes for swapdev and rootdev.
 	 */
 	if (bdevvp(rootdev, &rootvp))
 		panic("msdosfs_mountroot: can't setup rootvp");
 
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &msdosfs_vfsops;
 	mp->mnt_flag = 0;
 	LIST_INIT(&mp->mnt_vnodelist);
 
 	args.flags = 0;
 	args.uid = 0;
 	args.gid = 0;
 	args.mask = 0777;
 
 	if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = update_mp(mp, &args)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = vfs_lock(mp)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mp->mnt_vnodecovered = NULLVP;
 	(void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)msdosfs_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	return (0);
 }
 #endif
 
 /*
  * mp - path - addr in user space of mount point (ie /usr or whatever)
  * data - addr in user space of mount params including the name of the block
  * special file to treat as a filesystem.
  */
 static int
 msdosfs_mount(mp, path, data, ndp, p)
 	struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
 	struct msdosfs_args args; /* will hold data from mount request */
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	size_t size;
 	int error, flags;
 	mode_t accessmode;
 
 	error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
 	if (error)
 		return (error);
 	if (args.magic != MSDOSFS_ARGSMAGIC)
 		args.flags = 0;
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		pmp = VFSTOMSDOSFS(mp);
 		error = 0;
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = vflush(mp, NULLVP, flags);
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			/* not yet implemented */
 			error = EOPNOTSUPP;
 		if (error)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = pmp->pm_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
 						   p->p_ucred, p);
 				if (error) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 		}
 		if (args.fspec == 0) {
 #ifdef	__notyet__		/* doesn't work correctly with current mountd	XXX */
 			if (args.flags & MSDOSFSMNT_MNTOPT) {
 				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
 				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
 				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 			}
 #endif
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &pmp->pm_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	error = namei(ndp);
 	if (error)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 		if (error) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp, p, &args);
 #ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
 		pmp = VFSTOMSDOSFS(mp);
 #endif
 	} else {
 		if (devvp != pmp->pm_devvp)
 			error = EINVAL;	/* XXX needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 
 	error = update_mp(mp, &args);
 	if (error) {
 		msdosfs_unmount(mp, MNT_FORCE, p);
 		return error;
 	}
 
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) msdosfs_statfs(mp, &mp->mnt_stat, p);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
 #endif
 	return (0);
 }
 
 static int
 mountmsdosfs(devvp, mp, p, argp)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
 	dev_t dev = devvp->v_rdev;
 #ifndef __FreeBSD__
 	struct partinfo dpart;
 	int bsize = 0, dtype = 0, tmp;
 #endif
 	union bootsector *bsp;
 	struct byte_bpb33 *b33;
 	struct byte_bpb50 *b50;
 	struct byte_bpb710 *b710;
 	u_int8_t SecPerClust;
 	int	ronly, error;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp  = NULL; /* both used in error_exit */
 	pmp = NULL;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		/*
 	 	 * We need the disklabel to calculate the size of a FAT entry
 		 * later on. Also make sure the partition contains a filesystem
 		 * of type FS_MSDOS. This doesn't work for floppies, so we have
 		 * to check for them too.
 	 	 *
 	 	 * At least some parts of the msdos fs driver seem to assume
 		 * that the size of a disk block will always be 512 bytes.
 		 * Let's check it...
 		 */
 		error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
 				  FREAD, NOCRED, p);
 		if (error)
 			goto error_exit;
 		tmp   = dpart.part->p_fstype;
 		dtype = dpart.disklab->d_type;
 		bsize = dpart.disklab->d_secsize;
 		if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 	}
 #endif
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 */
 #ifdef	PC98
 	error = bread(devvp, 0, 1024, NOCRED, &bp);
 #else
 	error = bread(devvp, 0, 512, NOCRED, &bp);
 #endif
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
 	bsp = (union bootsector *)bp->b_data;
 	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
 	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
 	b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 #ifdef PC98
 		if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
 		    && (bsp->bs50.bsBootSectSig0 != 0       /* PC98 DOS 3.3x */
 		    || bsp->bs50.bsBootSectSig1 != 0)
 		    && (bsp->bs50.bsBootSectSig0 != 0x90    /* PC98 DOS 5.0  */
 		    || bsp->bs50.bsBootSectSig1 != 0x3d)
 		    && (bsp->bs50.bsBootSectSig0 != 0x46    /* PC98 DOS 3.3B */
 		    || bsp->bs50.bsBootSectSig1 != 0xfa)) {
 #else
 		if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
 	bzero((caddr_t)pmp, sizeof *pmp);
 	pmp->pm_mountp = mp;
 
 	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
 	 */
 	SecPerClust = b50->bpbSecPerClust;
 	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
 	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
 	pmp->pm_FATs = b50->bpbFATs;
 	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
 	pmp->pm_Sectors = getushort(b50->bpbSectors);
 	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
 	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
 	pmp->pm_Heads = getushort(b50->bpbHeads);
 	pmp->pm_Media = b50->bpbMedia;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 		/* XXX - We should probably check more values here */
 		if (!pmp->pm_BytesPerSec || !SecPerClust
 			|| !pmp->pm_Heads || pmp->pm_Heads > 255
 #ifdef PC98
 	    		|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
 #else
 			|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
 	} else {
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
 	if (pmp->pm_HugeSectors > 0xffffffff / 
 	    (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
 		/*
 		 * We cannot deal currently with this size of disk
 		 * due to fileid limitations (see msdosfs_getattr and
 		 * msdosfs_readdir)
 		 */
 		error = EINVAL;
 		printf("mountmsdosfs(): disk too big, sorry\n");
 		goto error_exit;
 	}
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
 		    || bsp->bs710.bsBootSectSig3 != BOOTSIG3
 		    || pmp->pm_Sectors
 		    || pmp->pm_FATsecs
 		    || getushort(b710->bpbFSVers)) {
 			error = EINVAL;
 			printf("mountmsdosfs(): bad FAT32 filesystem\n");
 			goto error_exit;
 		}
 		pmp->pm_fatmask = FAT32_MASK;
 		pmp->pm_fatmult = 4;
 		pmp->pm_fatdiv = 1;
 		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
 		if (getushort(b710->bpbExtFlags) & FATMIRROR)
 			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
 		else
 			pmp->pm_flags |= MSDOSFS_FATMIRROR;
 	} else
 		pmp->pm_flags |= MSDOSFS_FATMIRROR;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if (FAT32(pmp)) {
 			/*
 			 * GEMDOS doesn't know fat32.
 			 */
 			error = EINVAL;
 			goto error_exit;
 		}
 
 		/*
 		 * Check a few values (could do some more):
 		 * - logical sector size: power of 2, >= block size
 		 * - sectors per cluster: power of 2, >= 1
 		 * - number of sectors:   >= 1, <= size of partition
 		 */
 		if ( (SecPerClust == 0)
 		  || (SecPerClust & (SecPerClust - 1))
 		  || (pmp->pm_BytesPerSec < bsize)
 		  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
 		  || (pmp->pm_HugeSectors == 0)
 		  || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
 							> dpart.part->p_size)
 		   ) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		/*
 		 * XXX - Many parts of the msdos fs driver seem to assume that
 		 * the number of bytes per logical sector (BytesPerSec) will
 		 * always be the same as the number of bytes per disk block
 		 * Let's pretend it is.
 		 */
 		tmp = pmp->pm_BytesPerSec / bsize;
 		pmp->pm_BytesPerSec  = bsize;
 		pmp->pm_HugeSectors *= tmp;
 		pmp->pm_HiddenSects *= tmp;
 		pmp->pm_ResSectors  *= tmp;
 		pmp->pm_Sectors     *= tmp;
 		pmp->pm_FATsecs     *= tmp;
 		SecPerClust         *= tmp;
 	}
 #endif
 	pmp->pm_fatblk = pmp->pm_ResSectors;
 	if (FAT32(pmp)) {
 		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
 		pmp->pm_firstcluster = pmp->pm_fatblk
 			+ (pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
 	} else {
 		pmp->pm_rootdirblk = pmp->pm_fatblk +
 			(pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
 				       + pmp->pm_BytesPerSec - 1)
 			/ pmp->pm_BytesPerSec;/* in sectors */
 		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
 	}
 
 	pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust;
 	pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
 	pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
 		      && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
 		      && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
 		    ) {
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	} else 
 #endif
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
 		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
 			/*
 			 * This will usually be a floppy disk. This size makes
 			 * sure that one fat entry will not be split across
 			 * multiple blocks.
 			 */
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	}
 	if (FAT12(pmp))
 		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
 	else
 		pmp->pm_fatblocksize = DFLTBSIZE;
 
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
 	pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
 
 	/*
 	 * Compute mask and shift value for isolating cluster relative byte
 	 * offsets and cluster numbers from a file offset.
 	 */
 	pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
 	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
 	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
 
 	/*
 	 * Check for valid cluster size
 	 * must be a power of 2
 	 */
 	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	/*
 	 * Release the bootsector buffer.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 	/*
 	 * Check FSInfo.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
 		if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
 		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
 		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
 		else
 			pmp->pm_fsinfo = 0;
 		brelse(bp);
 		bp = NULL;
 	}
 
 	/*
 	 * Check and validate (or perhaps invalidate?) the fsinfo structure?		XXX
 	 */
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
 	 * fill it in.
 	 */
 	pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
 				   / N_INUSEBITS)
 				  * sizeof(*pmp->pm_inusemap),
 				  M_MSDOSFSFAT, M_WAITOK);
 
 	/*
 	 * fillinusemap() needs pm_devvp.
 	 */
 	pmp->pm_dev = dev;
 	pmp->pm_devvp = devvp;
 
 	/*
 	 * Have the inuse map filled in.
 	 */
 	if ((error = fillinusemap(pmp)) != 0)
 		goto error_exit;
 
 	/*
 	 * If they want fat updates to be synchronous then let them suffer
 	 * the performance degradation in exchange for the on disk copy of
 	 * the fat being correct just about all the time.  I suppose this
 	 * would be a good thing to turn on if the kernel is still flakey.
 	 */
 	if (mp->mnt_flag & MNT_SYNCHRONOUS)
 		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
 
 	/*
 	 * Finish up.
 	 */
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else
 		pmp->pm_fmod = 1;
 	mp->mnt_data = (qaddr_t) pmp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_flag |= MNT_LOCAL;
 	devvp->v_specmountpoint = mp;
 
 	return 0;
 
 error_exit:
 	if (bp)
 		brelse(bp);
 	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
 	if (pmp) {
 		if (pmp->pm_inusemap)
 			free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 
 	return (0);
 }
 
 /*
  * Unmount the filesystem described by mp.
  */
 static int
 msdosfs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	error = vflush(mp, NULLVP, flags);
 	if (error)
 		return error;
 	pmp = VFSTOMSDOSFS(mp);
 	pmp->pm_devvp->v_specmountpoint = NULL;
 #ifdef MSDOSFS_DEBUG
 	{
 		struct vnode *vp = pmp->pm_devvp;
 
 		printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
 		    vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 		printf("lastr %d, id %lu, mount %p, op %p\n",
 		    vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
 		printf("freef %p, freeb %p, mount %p\n",
 		    vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    TAILQ_FIRST(&vp->v_cleanblkhd),
 		    TAILQ_FIRST(&vp->v_dirtyblkhd),
 		    vp->v_numoutput, vp->v_type);
 		printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
 		    vp->v_socket, vp->v_tag,
 		    ((u_int *)vp->v_data)[0],
 		    ((u_int *)vp->v_data)[1]);
 	}
 #endif
 	error = VOP_CLOSE(pmp->pm_devvp,
 		    (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
 		    NOCRED, p);
 	vrele(pmp->pm_devvp);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 static int
 msdosfs_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
 #endif
 	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
 	if (error)
 		return (error);
 	*vpp = DETOV(ndep);
 	return (0);
 }
 
 static int
 msdosfs_quotactl(mp, cmds, uid, arg, p)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 msdosfs_statfs(mp, sbp, p)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 
 	pmp = VFSTOMSDOSFS(mp);
 	sbp->f_bsize = pmp->pm_bpcluster;
 	sbp->f_iosize = pmp->pm_bpcluster;
 	sbp->f_blocks = pmp->pm_nmbrofclusters;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
 	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (0);
 }
 
 static int
 msdosfs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *vp, *nvp;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
 
 	/*
 	 * If we ever switch to not updating all of the fats all the time,
 	 * this would be the place to update them from the first one.
 	 */
 	if (pmp->pm_fmod != 0) {
 		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
 			panic("msdosfs_sync: rofs mod");
 		else {
 			/* update fats here */
 		}
 	}
 	/*
 	 * Write back each (modified) denode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		dep = VTODE(vp);
 		if (vp->v_type == VNON ||
 		    ((dep->de_flag &
 		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
 		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		error = VOP_FSYNC(vp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 
 	/*
 	 * Flush filesystem control info.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(pmp->pm_devvp, 0, p);
 	}
 	return (allerror);
 }
 
 static int
 msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
 	struct denode *dep;
 	struct netcred *np;
 	int error;
 
 	np = vfs_export_lookup(mp, &pmp->pm_export, nam);
 	if (np == NULL)
 		return (EACCES);
 	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	*vpp = DETOV(dep);
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 static int
 msdosfs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	struct denode *dep;
 	struct defid *defhp;
 
 	dep = VTODE(vp);
 	defhp = (struct defid *)fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 static int
 msdosfs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	return EOPNOTSUPP;
 }
 
 static struct vfsops msdosfs_vfsops = {
 	msdosfs_mount,
 	msdosfs_start,
 	msdosfs_unmount,
 	msdosfs_root,
 	msdosfs_quotactl,
 	msdosfs_statfs,
 	msdosfs_sync,
 	msdosfs_vget,
 	msdosfs_fhtovp,
 	msdosfs_vptofh,
 	msdosfs_init
 };
 
 VFS_SET(msdosfs_vfsops, msdos, 0);
Index: head/sys/fs/msdosfs/msdosfs_vnops.c
===================================================================
--- head/sys/fs/msdosfs/msdosfs_vnops.c	(revision 49534)
+++ head/sys/fs/msdosfs/msdosfs_vnops.c	(revision 49535)
@@ -1,1986 +1,1985 @@
-/*	$Id: msdosfs_vnops.c,v 1.86 1999/06/26 02:46:26 mckusick Exp $ */
+/*	$Id: msdosfs_vnops.c,v 1.87 1999/07/25 04:01:32 bde Exp $ */
 /*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 #include <vm/vnode_pager.h>
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 /*
  * Prototypes for MSDOSFS vnode operations
  */
 static int msdosfs_create __P((struct vop_create_args *));
 static int msdosfs_mknod __P((struct vop_mknod_args *));
 static int msdosfs_close __P((struct vop_close_args *));
 static int msdosfs_access __P((struct vop_access_args *));
 static int msdosfs_getattr __P((struct vop_getattr_args *));
 static int msdosfs_setattr __P((struct vop_setattr_args *));
 static int msdosfs_read __P((struct vop_read_args *));
 static int msdosfs_write __P((struct vop_write_args *));
 static int msdosfs_fsync __P((struct vop_fsync_args *));
 static int msdosfs_remove __P((struct vop_remove_args *));
 static int msdosfs_link __P((struct vop_link_args *));
 static int msdosfs_rename __P((struct vop_rename_args *));
 static int msdosfs_mkdir __P((struct vop_mkdir_args *));
 static int msdosfs_rmdir __P((struct vop_rmdir_args *));
 static int msdosfs_symlink __P((struct vop_symlink_args *));
 static int msdosfs_readdir __P((struct vop_readdir_args *));
 static int msdosfs_abortop __P((struct vop_abortop_args *));
 static int msdosfs_bmap __P((struct vop_bmap_args *));
 static int msdosfs_strategy __P((struct vop_strategy_args *));
 static int msdosfs_print __P((struct vop_print_args *));
 static int msdosfs_pathconf __P((struct vop_pathconf_args *ap));
 static int msdosfs_getpages __P((struct vop_getpages_args *));
 static int msdosfs_putpages __P((struct vop_putpages_args *));
 
 /*
  * Some general notes:
  *
  * In the ufs filesystem the inodes, superblocks, and indirect blocks are
  * read/written using the vnode for the filesystem. Blocks that represent
  * the contents of a file are read/written using the vnode for the file
  * (including directories when they are read/written as files). This
  * presents problems for the dos filesystem because data that should be in
  * an inode (if dos had them) resides in the directory itself.  Since we
  * must update directory entries without the benefit of having the vnode
  * for the directory we must use the vnode for the filesystem.  This means
  * that when a directory is actually read/written (via read, write, or
  * readdir, or seek) we must use the vnode for the filesystem instead of
  * the vnode for the directory as would happen in ufs. This is to insure we
  * retreive the correct block from the buffer cache since the hash value is
  * based upon the vnode address and the desired block number.
  */
 
 /*
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
  * only if the SAVESTART bit in cn_flags is clear on success.
  */
 static int
 msdosfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode ndirent;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct timespec ts;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
 #endif
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad;
 	}
 
 	/*
 	 * Create a directory entry for the file, then call createde() to
 	 * have it installed. NOTE: DOS files are always executable.  We
 	 * use the absence of the owner write bit to make the file
 	 * readonly.
 	 */
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_create: no name");
 #endif
 	bzero(&ndirent, sizeof(ndirent));
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ?
 				ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = 0;
 	ndirent.de_FileSize = 0;
 	ndirent.de_dev = pdep->de_dev;
 	ndirent.de_devvp = pdep->de_devvp;
 	ndirent.de_pmp = pdep->de_pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	getnanotime(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 static int
 msdosfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 
 	switch (ap->a_vap->va_type) {
 	case VDIR:
 		return (msdosfs_mkdir((struct vop_mkdir_args *)ap));
 		break;
 
 	case VREG:
 		return (msdosfs_create((struct vop_create_args *)ap));
 		break;
 
 	default:
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 static int
 msdosfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct timespec ts;
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1) {
 		getnanotime(&ts);
 		DETIMES(dep, &ts, &ts, &ts);
 	}
 	simple_unlock(&vp->v_interlock);
 	return 0;
 }
 
 static int
 msdosfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, file_mode, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 
 	file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) |
 	    ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH));
 	file_mode &= pmp->pm_mask;
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	/* User id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return 0;
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == pmp->pm_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return (file_mode & mask) == mask ? 0 : EACCES;
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (pmp->pm_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return (file_mode & mask) == mask ? 0 : EACCES;
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return (file_mode & mask) == mask ? 0 : EACCES;
 }
 
 static int
 msdosfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	mode_t mode;
 	struct timespec ts;
 	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 	u_long fileid;
 
 	getnanotime(&ts);
 	DETIMES(dep, &ts, &ts, &ts);
 	vap->va_fsid = dev2udev(dep->de_dev);
 	/*
 	 * The following computation of the fileid must be the same as that
 	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
 	 * doesn't work.
 	 */
 	if (dep->de_Attributes & ATTR_DIRECTORY) {
 		fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk;
 		if (dep->de_StartCluster == MSDOSFSROOT)
 			fileid = 1;
 	} else {
 		fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk;
 		if (dep->de_dirclust == MSDOSFSROOT)
 			fileid = roottobn(pmp, 0) * dirsperblk;
 		fileid += dep->de_diroffset / sizeof(struct direntry);
 	}
 	vap->va_fileid = fileid;
 	if ((dep->de_Attributes & ATTR_READONLY) == 0)
 		mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	else
 		mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
 	vap->va_mode = mode & pmp->pm_mask;
 	vap->va_uid = pmp->pm_uid;
 	vap->va_gid = pmp->pm_gid;
 	vap->va_nlink = 1;
 	vap->va_rdev = 0;
 	vap->va_size = dep->de_FileSize;
 	dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime);
 	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
 		dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime);
 		dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime);
 	} else {
 		vap->va_atime = vap->va_mtime;
 		vap->va_ctime = vap->va_mtime;
 	}
 	vap->va_flags = 0;
 	if ((dep->de_Attributes & ATTR_ARCHIVE) == 0)
 		vap->va_flags |= SF_ARCHIVED;
 	vap->va_gen = 0;
 	vap->va_blocksize = pmp->pm_bpcluster;
 	vap->va_bytes =
 	    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
 	vap->va_type = ap->a_vp->v_type;
 	vap->va_filerev = dep->de_modrev;
 	return (0);
 }
 
 static int
 msdosfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	int error = 0;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n",
 	    ap->a_vp, vap, cred, ap->a_p);
 #endif
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_setattr(): returning EINVAL\n");
 		printf("    va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n",
 		    vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid);
 		printf("    va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n",
 		    vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen);
 		printf("    va_uid %x, va_gid %x\n",
 		    vap->va_uid, vap->va_gid);
 #endif
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return (error);
 		/*
 		 * We are very inconsistent about handling unsupported
 		 * attributes.  We ignored the access time and the
 		 * read and execute bits.  We were strict for the other
 		 * attributes.
 		 *
 		 * Here we are strict, stricter than ufs in not allowing
 		 * users to attempt to set SF_SETTABLE bits or anyone to
 		 * set unsupported bits.  However, we ignore attempts to
 		 * set ATTR_ARCHIVE for directories `cp -pr' from a more
 		 * sensible file system attempts it a lot.
 		 */
 		if (cred->cr_uid != 0) {
 			if (vap->va_flags & SF_SETTABLE)
 				return EPERM;
 		}
 		if (vap->va_flags & ~SF_ARCHIVED)
 			return EOPNOTSUPP;
 		if (vap->va_flags & SF_ARCHIVED)
 			dep->de_Attributes &= ~ATTR_ARCHIVE;
 		else if (!(dep->de_Attributes & ATTR_DIRECTORY))
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		dep->de_flag |= DE_MODIFIED;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		uid_t uid;
 		gid_t gid;
 		
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		uid = vap->va_uid;
 		if (uid == (uid_t)VNOVAL)
 			uid = pmp->pm_uid;
 		gid = vap->va_gid;
 		if (gid == (gid_t)VNOVAL)
 			gid = pmp->pm_gid;
 		if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
 		    (gid != pmp->pm_gid && !groupmember(gid, cred))) &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return error;
 		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
 			return EINVAL;
 	}
 
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 			/* NOT REACHED */
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 		error = detrunc(dep, vap->va_size, 0, cred, ap->a_p);
 		if (error)
 			return error;
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p))))
 			return (error);
 		if (vp->v_type != VDIR) {
 			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
 			    vap->va_atime.tv_sec != VNOVAL)
 				unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL);
 			if (vap->va_mtime.tv_sec != VNOVAL)
 				unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL);
 			dep->de_Attributes |= ATTR_ARCHIVE;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	/*
 	 * DOS files only have the ability to have their writability
 	 * attribute set, so we use the owner write bit to set the readonly
 	 * attribute.
 	 */
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return (error);
 		if (vp->v_type != VDIR) {
 			/* We ignore the read and execute bits. */
 			if (vap->va_mode & VWRITE)
 				dep->de_Attributes &= ~ATTR_READONLY;
 			else
 				dep->de_Attributes |= ATTR_READONLY;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	return (deupdat(dep, 1));
 }
 
 static int
 msdosfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error = 0;
 	int diff;
 	int blsize;
 	int isadir;
 	int orig_resid;
 	long n;
 	long on;
 	daddr_t lbn;
 	daddr_t rablock;
 	int rasize;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct uio *uio = ap->a_uio;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * If they didn't ask for any data, then we are done.
 	 */
 	orig_resid = uio->uio_resid;
 	if (orig_resid <= 0)
 		return (0);
 
 	isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	do {
 		lbn = de_cluster(pmp, uio->uio_offset);
 		on = uio->uio_offset & pmp->pm_crbomask;
 		n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff <= 0)
 			break;
 		if (diff < n)
 			n = diff;
 		/* convert cluster # to block # if a directory */
 		if (isadir) {
 			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
 			if (error)
 				break;
 		}
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
 		if (isadir) {
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else {
 			rablock = lbn + 1;
 			if (vp->v_lastr + 1 == lbn &&
 			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
 				rasize = pmp->pm_bpcluster;
 				error = breadn(vp, lbn, pmp->pm_bpcluster,
 				    &rablock, &rasize, 1, NOCRED, &bp); 
 			} else
 				error = bread(vp, lbn, pmp->pm_bpcluster, 
 				    NOCRED, &bp);
 			vp->v_lastr = lbn;
 		}
 		n = min(n, pmp->pm_bpcluster - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			break;
 		}
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	if (!isadir && (error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
 		dep->de_flag |= DE_ACCESS;
 	return (error);
 }
 
 /*
  * Write data to a file or directory.
  */
 static int
 msdosfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int n;
 	int croffset;
 	int resid;
 	u_long osize;
 	int error = 0;
 	u_long count;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
 	    vp, uio, ioflag, cred);
 	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
 	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
 		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
 	default:
 		panic("msdosfs_write(): bad file type");
 	}
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * If they've exceeded their filesize limit, tell them about it.
 	 */
 	if (p &&
 	    ((uio->uio_offset + uio->uio_resid) >
 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
 		psignal(p, SIGXFSZ);
 		return (EFBIG);
 	}
 
 	/*
 	 * If the offset we are starting the write at is beyond the end of
 	 * the file, then they've done a seek.  Unix filesystems allow
 	 * files with holes in them, DOS doesn't so we must fill the hole
 	 * with zeroed blocks.
 	 */
 	if (uio->uio_offset > dep->de_FileSize) {
 		error = deextend(dep, uio->uio_offset, cred);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Remember some values in case the write fails.
 	 */
 	resid = uio->uio_resid;
 	osize = dep->de_FileSize;
 
 	/*
 	 * If we write beyond the end of the file, extend it to its ultimate
 	 * size ahead of the time to hopefully get a contiguous area.
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
 			de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
 		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
 
 		croffset = uio->uio_offset & pmp->pm_crbomask;
 		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
 		if (uio->uio_offset + n > dep->de_FileSize) {
 			dep->de_FileSize = uio->uio_offset + n;
 			/* The object size needs to be set before buffer is allocated */
 			vnode_pager_setsize(vp, dep->de_FileSize);
 		}
 
 		bn = de_cluster(pmp, uio->uio_offset);
 		if ((uio->uio_offset & pmp->pm_crbomask) == 0
 		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) 
 		        > de_cluster(pmp, uio->uio_offset)
 			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
 			/*
 			 * If either the whole cluster gets written,
 			 * or we write the cluster from its start beyond EOF,
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
 			clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the fat table. (see msdosfs_strategy)
 			 */
 			if (bp->b_blkno == bp->b_lblkno) {
 				error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 
 				     0, 0);
 				if (error)
 					bp->b_blkno = -1;
 			}
 			if (bp->b_blkno == -1) {
 				brelse(bp);
 				if (!error)
 					error = EIO;		/* XXX */
 				break;
 			}
 		} else {
 			/*
 			 * The block we need to write into exists, so read it in.
 			 */
 			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 		}
 
 		/*
 		 * Should these vnode_pager_* functions be done on dir
 		 * files?
 		 */
 
 		/*
 		 * Copy the data from user space into the buf header.
 		 */
 		error = uiomove(bp->b_data + croffset, n, uio);
 
 		/*
 		 * If they want this synchronous then write it and wait for
 		 * it.  Otherwise, if on a cluster boundary write it
 		 * asynchronously so we can move on to the next block
 		 * without delay.  Otherwise do a delayed write because we
 		 * may want to write somemore into the block later.
 		 */
 		if (ioflag & IO_SYNC)
 			(void) bwrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster)
 			bawrite(bp);
 		else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
 	 * to the size it was before the write was attempted.
 	 */
 errexit:
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
 			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
 	} else if (ioflag & IO_SYNC)
 		error = deupdat(dep, 1);
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 msdosfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 	struct buf *bp, *nbp;
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("msdosfs_fsync: not dirty");
 		bremfree(bp);
 		splx(s);
 		(void) bwrite(bp);
 		goto loop;
 	}
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0);
 	}
 #ifdef DIAGNOSTIC
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vprint("msdosfs_fsync: dirty", vp);
 		goto loop;
 	}
 #endif
 	splx(s);
 	return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
 }
 
 static int
 msdosfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct denode *ddep = VTODE(ap->a_dvp);
 	int error;
 
 	if (ap->a_vp->v_type == VDIR)
 		error = EPERM;
 	else
 		error = removede(ddep, dep);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
 #endif
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what links are. But since we already called
  * msdosfs_lookup() with create and lockparent, the parent is locked so we
  * have to free it before we return the error.
  */
 static int
 msdosfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	VOP_ABORTOP(ap->a_tdvp, ap->a_cnp);
 	return (EOPNOTSUPP);
 }
 
 /*
  * Renames on files require moving the denode to a new hash queue since the
  * denode's location is used to compute which hash queue to put the file
  * in. Unless it is a rename in place.  For example "mv a b".
  *
  * What follows is the basic algorithm:
  *
  * if (file move) {
  *	if (dest file exists) {
  *		remove dest file
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing directory slot
  *	} else {
  *		write new entry in dest directory
  *		update offset and dirclust in denode
  *		move denode to new hash chain
  *		clear old directory entry
  *	}
  * } else {
  *	directory move
  *	if (dest directory exists) {
  *		if (dest is not empty) {
  *			return ENOTEMPTY
  *		}
  *		remove dest directory
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing entry
  *	} else {
  *		be sure dest is not a child of src directory
  *		write entry in dest directory
  *		update "." and ".." in moved directory
  *		clear old directory entry for moved directory
  *	}
  * }
  *
  * On entry:
  *	source's parent directory is unlocked
  *	source file or directory is unlocked
  *	destination's parent directory is locked
  *	destination file or directory is locked if it exists
  *
  * On exit:
  *	all denodes should be released
  *
  * Notes:
  * I'm not sure how the memory containing the pathnames pointed at by the
  * componentname structures is freed, there may be some memory bleeding
  * for each rename done.
  */
 static int
 msdosfs_rename(ap)
 	struct vop_rename_args /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct proc *p = fcnp->cn_proc;
 	struct denode *ip, *xp, *dp, *zp;
 	u_char toname[11], oldname[11];
 	u_long from_diroffset, to_diroffset;
 	u_char to_count;
 	int doingdirectory = 0, newparent = 0;
 	int error;
 	u_long cn;
 	daddr_t bn;
 	struct denode *fddep;	/* from file's parent directory	 */
 	struct denode *fdep;	/* from file or directory	 */
 	struct denode *tddep;	/* to file's parent directory	 */
 	struct denode *tdep;	/* to file or directory		 */
 	struct msdosfsmount *pmp;
 	struct direntry *dotdotp;
 	struct buf *bp;
 
 	fddep = VTODE(ap->a_fdvp);
 	fdep = VTODE(ap->a_fvp);
 	tddep = VTODE(ap->a_tdvp);
 	tdep = tvp ? VTODE(tvp) : NULL;
 	pmp = fddep->de_pmp;
 
 	pmp = VFSTOMSDOSFS(fdvp->v_mount);
 
 #ifdef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_rename: no name");
 #endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, tcnp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fdvp, fcnp);
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	/*
 	 * If source and dest are the same, do nothing.
 	 */
 	if (tvp == fvp) {
 		error = 0;
 		goto abortit;
 	}
 
 	error = vn_lock(fvp, LK_EXCLUSIVE, p);
 	if (error)
 		goto abortit;
 	dp = VTODE(fdvp);
 	ip = VTODE(fvp);
 
 	/*
 	 * Be sure we are not renaming ".", "..", or an alias of ".". This
 	 * leads to a crippled directory tree.  It's pretty tough to do a
 	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
 	 * doesn't work if the ".." entry is missing.
 	 */
 	if (ip->de_Attributes & ATTR_DIRECTORY) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip ||
 		    (fcnp->cn_flags & ISDOTDOT) ||
 		    (tcnp->cn_flags & ISDOTDOT) ||
 		    (ip->de_flag & DE_RENAME)) {
 			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->de_flag |= DE_RENAME;
 		doingdirectory++;
 	}
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTODE(tdvp);
 	xp = tvp ? VTODE(tvp) : NULL;
 	/*
 	 * Remember direntry place to use for destination
 	 */
 	to_diroffset = dp->de_fndoffset;
 	to_count = dp->de_fndcnt;
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory heirarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to doscheckpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 	VOP_UNLOCK(fvp, 0, p);
 	if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster)
 		newparent = 1;
 	vrele(fdvp);
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		/*
 		 * doscheckpath() vput()'s dp,
 		 * so we have to do a relookup afterwards
 		 */
 		error = doscheckpath(ip, dp);
 		if (error)
 			goto out;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("msdosfs_rename: lost to startdir");
 		error = relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		dp = VTODE(tdvp);
 		xp = tvp ? VTODE(tvp) : NULL;
 	}
 
 	if (xp != NULL) {
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if (xp->de_Attributes & ATTR_DIRECTORY) {
 			if (!dosdirempty(xp)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = removede(dp, xp);
 		if (error)
 			goto bad;
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * Convert the filename in tcnp into a dos filename. We copy this
 	 * into the denode and directory entry for the destination
 	 * file/directory.
 	 */
 	error = uniqdosname(VTODE(tdvp), tcnp, toname);
 	if (error)
 		goto abortit;
 
 	/*
 	 * Since from wasn't locked at various places above,
 	 * have to do a relookup here.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	if ((fcnp->cn_flags & SAVESTART) == 0)
 		panic("msdosfs_rename: lost from startdir");
 	if (!newparent)
 		VOP_UNLOCK(tdvp, 0, p);
 	(void) relookup(fdvp, &fvp, fcnp);
 	if (fvp == NULL) {
 		/*
 		 * From name has disappeared.
 		 */
 		if (doingdirectory)
 			panic("rename: lost dir entry");
 		vrele(ap->a_fvp);
 		if (newparent)
 			VOP_UNLOCK(tdvp, 0, p);
 		vrele(tdvp);
 		return 0;
 	}
 	xp = VTODE(fvp);
 	zp = VTODE(fdvp);
 	from_diroffset = zp->de_fndoffset;
 
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed till now. If the source is a file the entry may
 	 * have been unlinked or renamed. In either case there is
 	 * no further work to be done. If the source is a directory
 	 * then it cannot have been rmdir'ed or renamed; this is
 	 * prohibited by the DE_RENAME flag.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
 			panic("rename: lost dir entry");
 		vrele(ap->a_fvp);
 		VOP_UNLOCK(fvp, 0, p);
 		if (newparent)
 			VOP_UNLOCK(fdvp, 0, p);
 		xp = NULL;
 	} else {
 		vrele(fvp);
 		xp = NULL;
 
 		/*
 		 * First write a new entry in the destination
 		 * directory and mark the entry in the source directory
 		 * as deleted.  Then move the denode to the correct hash
 		 * chain for its new location in the filesystem.  And, if
 		 * we moved a directory, then update its .. entry to point
 		 * to the new parent directory.
 		 */
 		bcopy(ip->de_Name, oldname, 11);
 		bcopy(toname, ip->de_Name, 11);	/* update denode */
 		dp->de_fndoffset = to_diroffset;
 		dp->de_fndcnt = to_count;
 		error = createde(ip, dp, (struct denode **)0, tcnp);
 		if (error) {
 			bcopy(oldname, ip->de_Name, 11);
 			if (newparent)
 				VOP_UNLOCK(fdvp, 0, p);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		ip->de_refcnt++;
 		zp->de_fndoffset = from_diroffset;
 		error = removede(zp, ip);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			if (newparent)
 				VOP_UNLOCK(fdvp, 0, p);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		if (!doingdirectory) {
 			error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0,
 				       &ip->de_dirclust, 0);
 			if (error) {
 				/* XXX should really panic here, fs is corrupt */
 				if (newparent)
 					VOP_UNLOCK(fdvp, 0, p);
 				VOP_UNLOCK(fvp, 0, p);
 				goto bad;
 			}
 			if (ip->de_dirclust == MSDOSFSROOT)
 				ip->de_diroffset = to_diroffset;
 			else
 				ip->de_diroffset = to_diroffset & pmp->pm_crbomask;
 		}
 		reinsert(ip);
 		if (newparent)
 			VOP_UNLOCK(fdvp, 0, p);
 	}
 
 	/*
 	 * If we moved a directory to a new parent directory, then we must
 	 * fixup the ".." entry in the moved directory.
 	 */
 	if (doingdirectory && newparent) {
 		cn = ip->de_StartCluster;
 		if (cn == MSDOSFSROOT) {
 			/* this should never happen */
 			panic("msdosfs_rename(): updating .. in root directory?");
 		} else
 			bn = cntobn(pmp, cn);
 		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 			      NOCRED, &bp);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			brelse(bp);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		dotdotp = (struct direntry *)bp->b_data + 1;
 		putushort(dotdotp->deStartCluster, dp->de_StartCluster);
 		if (FAT32(pmp))
 			putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16);
 		error = bwrite(bp);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 	}
 
 	VOP_UNLOCK(fvp, 0, p);
 bad:
 	if (xp)
 		vput(tvp);
 	vput(tdvp);
 out:
 	ip->de_flag &= ~DE_RENAME;
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 
 }
 
 static struct {
 	struct direntry dot;
 	struct direntry dotdot;
 } dosdirtemplate = {
 	{	".       ", "   ",			/* the . entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,	 				/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 } 				/* filesize */
 	},
 	{	"..      ", "   ",			/* the .. entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,	 				/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	}
 };
 
 static int
 msdosfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struvt vnode **a_vpp;
 		struvt componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct direntry *denp;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	struct buf *bp;
 	u_long newcluster, pcl;
 	int bn;
 	int error;
 	struct denode ndirent;
 	struct timespec ts;
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad2;
 	}
 
 	/*
 	 * Allocate a cluster to hold the about to be created directory.
 	 */
 	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
 	if (error)
 		goto bad2;
 
 	bzero(&ndirent, sizeof(ndirent));
 	ndirent.de_pmp = pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	getnanotime(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 
 	/*
 	 * Now fill the cluster with the "." and ".." entries. And write
 	 * the cluster to disk.  This way it is there for the parent
 	 * directory to be pointing at if there were a crash.
 	 */
 	bn = cntobn(pmp, newcluster);
 	/* always succeeds */
 	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0);
 	bzero(bp->b_data, pmp->pm_bpcluster);
 	bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate);
 	denp = (struct direntry *)bp->b_data;
 	putushort(denp[0].deStartCluster, newcluster);
 	putushort(denp[0].deCDate, ndirent.de_CDate);
 	putushort(denp[0].deCTime, ndirent.de_CTime);
 	denp[0].deCHundredth = ndirent.de_CHun;
 	putushort(denp[0].deADate, ndirent.de_ADate);
 	putushort(denp[0].deMDate, ndirent.de_MDate);
 	putushort(denp[0].deMTime, ndirent.de_MTime);
 	pcl = pdep->de_StartCluster;
 	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 		pcl = 0;
 	putushort(denp[1].deStartCluster, pcl);
 	putushort(denp[1].deCDate, ndirent.de_CDate);
 	putushort(denp[1].deCTime, ndirent.de_CTime);
 	denp[1].deCHundredth = ndirent.de_CHun;
 	putushort(denp[1].deADate, ndirent.de_ADate);
 	putushort(denp[1].deMDate, ndirent.de_MDate);
 	putushort(denp[1].deMTime, ndirent.de_MTime);
 	if (FAT32(pmp)) {
 		putushort(denp[0].deHighClust, newcluster >> 16);
 		putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16);
 	}
 
 	error = bwrite(bp);
 	if (error)
 		goto bad;
 
 	/*
 	 * Now build up a directory entry pointing to the newly allocated
 	 * cluster.  This will be written to an empty slot in the parent
 	 * directory.
 	 */
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_mkdir: no name");
 #endif
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_DIRECTORY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = newcluster;
 	ndirent.de_FileSize = 0;
 	ndirent.de_dev = pdep->de_dev;
 	ndirent.de_devvp = pdep->de_devvp;
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	clusterfree(pmp, newcluster, NULL);
 bad2:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 static int
 msdosfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct denode *ip, *dp;
 	struct proc *p = cnp->cn_proc;
 	int error;
 	
 	ip = VTODE(vp);
 	dp = VTODE(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	error = 0;
 	if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	/*
 	 * Delete the entry from the directory.  For dos filesystems this
 	 * gets rid of the directory entry on disk, the in memory copy
 	 * still exists but the de_refcnt is <= 0.  This prevents it from
 	 * being found by deget().  When the vput() on dep is done we give
 	 * up access and eventually msdosfs_reclaim() will be called which
 	 * will remove it from the denode cache.
 	 */
 	error = removede(dp, ip);
 	if (error)
 		goto out;
 	/*
 	 * This is where we decrement the link count in the parent
 	 * directory.  Since dos filesystems don't do this we just purge
 	 * the name cache.
 	 */
 	cache_purge(dvp);
 	VOP_UNLOCK(dvp, 0, p);
 	/*
 	 * Truncate the directory that is being deleted.
 	 */
 	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p);
 	cache_purge(vp);
 
 	vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 out:
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what symlinks are.
  */
 static int
 msdosfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	/* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */
 	return (EOPNOTSUPP);
 }
 
 static int
 msdosfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	int error = 0;
 	int diff;
 	long n;
 	int blsize;
 	long on;
 	u_long cn;
 	u_long fileno;
 	u_long dirsperblk;
 	long bias = 0;
 	daddr_t bn, lbn;
 	struct buf *bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 	struct dirent dirbuf;
 	struct uio *uio = ap->a_uio;
 	u_long *cookies = NULL;
 	int ncookies = 0;
 	off_t offset, off;
 	int chksum = -1;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the the filesystem vnode, and hence can
 	 * retrieve the wrong block from the buffer cache for a plain file.
 	 * So, fail attempts to readdir() on a plain file.
 	 */
 	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
 		return (ENOTDIR);
 
 	/*
 	 * To be safe, initialize dirbuf
 	 */
 	bzero(dirbuf.d_name, sizeof(dirbuf.d_name));
 
 	/*
 	 * If the user buffer is smaller than the size of one dos directory
 	 * entry or the file offset is not a multiple of the size of a
 	 * directory entry, then we fail the read.
 	 */
 	off = offset = uio->uio_offset;
 	if (uio->uio_resid < sizeof(struct direntry) ||
 	    (offset & (sizeof(struct direntry) - 1)))
 		return (EINVAL);
 
 	if (ap->a_ncookies) {
 		ncookies = uio->uio_resid / 16;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
 		       M_WAITOK);
 		*ap->a_cookies = cookies;
 		*ap->a_ncookies = ncookies;
 	}
 
 	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 
 	/*
 	 * If they are reading from the root directory then, we simulate
 	 * the . and .. entries since these don't exist in the root
 	 * directory.  We also set the offset bias to make up for having to
 	 * simulate these entries. By this I mean that at file offset 64 we
 	 * read the first entry in the root directory that lives on disk.
 	 */
 	if (dep->de_StartCluster == MSDOSFSROOT
 	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
 #if 0
 		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
 		    offset);
 #endif
 		bias = 2 * sizeof(struct direntry);
 		if (offset < bias) {
 			for (n = (int)offset / sizeof(struct direntry);
 			     n < 2; n++) {
 				if (FAT32(pmp))
 					dirbuf.d_fileno = cntobn(pmp,
 								 pmp->pm_rootdirblk)
 							  * dirsperblk;
 				else
 					dirbuf.d_fileno = 1;
 				dirbuf.d_type = DT_DIR;
 				switch (n) {
 				case 0:
 					dirbuf.d_namlen = 1;
 					strcpy(dirbuf.d_name, ".");
 					break;
 				case 1:
 					dirbuf.d_namlen = 2;
 					strcpy(dirbuf.d_name, "..");
 					break;
 				}
 				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 				if (uio->uio_resid < dirbuf.d_reclen)
 					goto out;
 				error = uiomove((caddr_t) &dirbuf,
 						dirbuf.d_reclen, uio);
 				if (error)
 					goto out;
 				offset += sizeof(struct direntry);
 				off = offset;
 				if (cookies) {
 					*cookies++ = offset;
 					if (--ncookies <= 0)
 						goto out;
 				}
 			}
 		}
 	}
 
 	off = offset;
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 		n = min(n, blsize - bp->b_resid);
 
 		/*
 		 * Convert from dos directory entries to fs-independent
 		 * directory entries.
 		 */
 		for (dentp = (struct direntry *)(bp->b_data + on);
 		     (char *)dentp < bp->b_data + on + n;
 		     dentp++, offset += sizeof(struct direntry)) {
 #if 0
 			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
 			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
 #endif
 			/*
 			 * If this is an unused entry, we can stop.
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
 				goto out;
 			}
 			/*
 			 * Skip deleted entries.
 			 */
 			if (dentp->deName[0] == SLOT_DELETED) {
 				chksum = -1;
 				continue;
 			}
 
 			/*
 			 * Handle Win95 long directory entries
 			 */
 			if (dentp->deAttributes == ATTR_WIN95) {
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
 				chksum = win2unixfn((struct winentry *)dentp,
 					&dirbuf, chksum,
 					pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
 					pmp->pm_u2w);
 				continue;
 			}
 
 			/*
 			 * Skip volume labels
 			 */
 			if (dentp->deAttributes & ATTR_VOLUME) {
 				chksum = -1;
 				continue;
 			}
 			/*
 			 * This computation of d_fileno must match
 			 * the computation of va_fileid in
 			 * msdosfs_getattr.
 			 */
 			if (dentp->deAttributes & ATTR_DIRECTORY) {
 				fileno = getushort(dentp->deStartCluster);
 				if (FAT32(pmp))
 					fileno |= getushort(dentp->deHighClust) << 16;
 				/* if this is the root directory */
 				if (fileno == MSDOSFSROOT)
 					if (FAT32(pmp))
 						fileno = cntobn(pmp,
 								pmp->pm_rootdirblk)
 							 * dirsperblk;
 					else
 						fileno = 1;
 				else
 					fileno = cntobn(pmp, fileno) * dirsperblk;
 				dirbuf.d_fileno = fileno;
 				dirbuf.d_type = DT_DIR;
 			} else {
 				dirbuf.d_fileno = offset / sizeof(struct direntry);
 				dirbuf.d_type = DT_REG;
 			}
 			if (chksum != winChksum(dentp->deName))
 				dirbuf.d_namlen = dos2unixfn(dentp->deName,
 				    (u_char *)dirbuf.d_name,
 				    dentp->deLowerCase |
 					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
 					(LCASE_BASE | LCASE_EXT) : 0),
 				    pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
 				    pmp->pm_d2u,
 				    pmp->pm_flags & MSDOSFSMNT_ULTABLE,
 				    pmp->pm_ul);
 			else
 				dirbuf.d_name[dirbuf.d_namlen] = 0;
 			chksum = -1;
 			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 			if (uio->uio_resid < dirbuf.d_reclen) {
 				brelse(bp);
 				goto out;
 			}
 			error = uiomove((caddr_t) &dirbuf,
 					dirbuf.d_reclen, uio);
 			if (error) {
 				brelse(bp);
 				goto out;
 			}
 			if (cookies) {
 				*cookies++ = offset + sizeof(struct direntry);
 				if (--ncookies <= 0) {
 					brelse(bp);
 					goto out;
 				}
 			}
 			off = offset + sizeof(struct direntry);
 		}
 		brelse(bp);
 	}
 out:
 	/* Subtract unused cookies */
 	if (ap->a_ncookies)
 		*ap->a_ncookies -= ncookies;
 
 	uio->uio_offset = off;
 
 	/*
 	 * Set the eofflag (NFS uses it)
 	 */
 	if (ap->a_eofflag) {
 		if (dep->de_FileSize - (offset - bias) <= 0)
 			*ap->a_eofflag = 1;
 		else
 			*ap->a_eofflag = 0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * vp  - address of vnode file the file
  * bn  - which cluster we are interested in mapping to a filesystem block number.
  * vpp - returns the vnode for the block special file holding the filesystem
  *	 containing the file of interest
  * bnp - address of where to return the filesystem relative block number
  */
 static int
 msdosfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = dep->de_devvp;
 	if (ap->a_bnp == NULL)
 		return (0);
 	if (ap->a_runp) {
 		/*
 		 * Sequential clusters should be counted here.
 		 */
 		*ap->a_runp = 0;
 	}
 	if (ap->a_runb) {
 		*ap->a_runb = 0;
 	}
 	return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
 }
 
 static int
 msdosfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp = ap->a_bp;
 	struct denode *dep = VTODE(bp->b_vp);
 	struct vnode *vp;
 	int error = 0;
 
 	if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
 		panic("msdosfs_strategy: spec");
 	/*
 	 * If we don't already know the filesystem relative block number
 	 * then get it using pcbmap().  If pcbmap() returns the block
 	 * number as -1 then we've got a hole in the file.  DOS filesystems
 	 * don't allow files with holes, so we shouldn't ever see this.
 	 */
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
 		if (error) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if (bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	/*
 	 * Read/write the block from/to the disk that contains the desired
 	 * file block.
 	 */
 	vp = dep->de_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOP_STRATEGY(vp, bp);
 	return (0);
 }
 
 static int
 msdosfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *vp;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	printf(
 	    "tag VT_MSDOSFS, startcluster %lu, dircluster %lu, diroffset %lu ",
 	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
 	printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev));
 	lockmgr_printinfo(&dep->de_lock);
 	printf("\n");
 	return (0);
 }
 
 static int
 msdosfs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * get page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 msdosfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 /*
  * put page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 msdosfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 
 /* Global vfs data structures for msdosfs */
 vop_t **msdosfs_vnodeop_p;
 static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) msdosfs_abortop },
 	{ &vop_access_desc,		(vop_t *) msdosfs_access },
 	{ &vop_bmap_desc,		(vop_t *) msdosfs_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) msdosfs_lookup },
 	{ &vop_close_desc,		(vop_t *) msdosfs_close },
 	{ &vop_create_desc,		(vop_t *) msdosfs_create },
 	{ &vop_fsync_desc,		(vop_t *) msdosfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) msdosfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) msdosfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_link_desc,		(vop_t *) msdosfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) msdosfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) msdosfs_mknod },
 	{ &vop_pathconf_desc,		(vop_t *) msdosfs_pathconf },
 	{ &vop_print_desc,		(vop_t *) msdosfs_print },
 	{ &vop_read_desc,		(vop_t *) msdosfs_read },
 	{ &vop_readdir_desc,		(vop_t *) msdosfs_readdir },
 	{ &vop_reclaim_desc,		(vop_t *) msdosfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) msdosfs_remove },
 	{ &vop_rename_desc,		(vop_t *) msdosfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) msdosfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) msdosfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) msdosfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) msdosfs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) msdosfs_write },
 	{ &vop_getpages_desc,		(vop_t *) msdosfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) msdosfs_putpages },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
 	{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
 
 VNODEOP_SET(msdosfs_vnodeop_opv_desc);
Index: head/sys/fs/ntfs/ntfs_compr.c
===================================================================
--- head/sys/fs/ntfs/ntfs_compr.c	(revision 49534)
+++ head/sys/fs/ntfs/ntfs_compr.c	(revision 49535)
@@ -1,120 +1,118 @@
 /*	$NetBSD: ntfs_compr.c,v 1.2 1999/05/06 15:43:18 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_compr.c,v 1.3 1999/04/20 21:06:43 semenu Exp $
+ *	$Id: ntfs_compr.c,v 1.4 1999/05/12 09:42:54 semenu Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/malloc.h>
 #ifdef __FreeBSD__
 #include <machine/clock.h>
 #endif
-
-#include <miscfs/specfs/specdev.h>
 
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_compr.h>
 
 #define GET_UINT16(addr)	(*((u_int16_t *)(addr)))
 
 int
 ntfs_uncompblock(
 	u_int8_t * buf,
 	u_int8_t * cbuf)
 {
 	u_int32_t       ctag;
 	int             len, dshift, lmask;
 	int             blen, boff;
 	int             i, j;
 	int             pos, cpos;
 
 	len = GET_UINT16(cbuf) & 0xFFF;
 	dprintf(("ntfs_uncompblock: block length: %d + 3, 0x%x,0x%04x\n",
 	    len, len, GET_UINT16(cbuf)));
 
 	if (!(GET_UINT16(cbuf) & 0x8000)) {
 		if ((len + 1) != NTFS_COMPBLOCK_SIZE) {
 			dprintf(("ntfs_uncompblock: len: %x instead of %d\n",
 			    len, 0xfff));
 		}
 		memcpy(buf, cbuf + 2, len + 1);
 		bzero(buf + len + 1, NTFS_COMPBLOCK_SIZE - 1 - len);
 		return len + 3;
 	}
 	cpos = 2;
 	pos = 0;
 	while ((cpos < len + 3) && (pos < NTFS_COMPBLOCK_SIZE)) {
 		ctag = cbuf[cpos++];
 		for (i = 0; (i < 8) && (pos < NTFS_COMPBLOCK_SIZE); i++) {
 			if (ctag & 1) {
 				for (j = pos - 1, lmask = 0xFFF, dshift = 12;
 				     j >= 0x10; j >>= 1) {
 					dshift--;
 					lmask >>= 1;
 				}
 				boff = -1 - (GET_UINT16(cbuf + cpos) >> dshift);
 				blen = 3 + (GET_UINT16(cbuf + cpos) & lmask);
 				for (j = 0; (j < blen) && (pos < NTFS_COMPBLOCK_SIZE); j++) {
 					buf[pos] = buf[pos + boff];
 					pos++;
 				}
 				cpos += 2;
 			} else {
 				buf[pos++] = cbuf[cpos++];
 			}
 			ctag >>= 1;
 		}
 	}
 	return len + 3;
 }
 
 int
 ntfs_uncompunit(
 	struct ntfsmount * ntmp,
 	u_int8_t * uup,
 	u_int8_t * cup)
 {
 	int             i;
 	int             off = 0;
 	int             new;
 
 	for (i = 0; i * NTFS_COMPBLOCK_SIZE < ntfs_cntob(NTFS_COMPUNIT_CL); i++) {
 		new = ntfs_uncompblock(uup + i * NTFS_COMPBLOCK_SIZE, cup + off);
 		if (new == 0)
 			return (EINVAL);
 		off += new;
 	}
 	return (0);
 }
Index: head/sys/fs/ntfs/ntfs_subr.c
===================================================================
--- head/sys/fs/ntfs/ntfs_subr.c	(revision 49534)
+++ head/sys/fs/ntfs/ntfs_subr.c	(revision 49535)
@@ -1,1901 +1,1899 @@
 /*	$NetBSD: ntfs_subr.c,v 1.2 1999/05/06 15:43:19 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_subr.c,v 1.3 1999/04/20 21:06:43 semenu Exp $
+ *	$Id: ntfs_subr.c,v 1.4 1999/05/12 09:43:01 semenu Exp $
  */
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/malloc.h>
 #if defined(__FreeBSD__)
 #include <machine/clock.h>
 #endif
-
-#include <miscfs/specfs/specdev.h>
 
 /* #define NTFS_DEBUG 1 */
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfsmount.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_vfsops.h>
 #include <ntfs/ntfs_extern.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_compr.h>
 #include <ntfs/ntfs_ihash.h>
 
 #if defined(__FreeBSD__)
 MALLOC_DEFINE(M_NTFSNTVATTR, "NTFS vattr", "NTFS file attribute information");
 MALLOC_DEFINE(M_NTFSRDATA, "NTFS res data", "NTFS resident data");
 MALLOC_DEFINE(M_NTFSRUN, "NTFS vrun", "NTFS vrun storage");
 MALLOC_DEFINE(M_NTFSDECOMP, "NTFS decomp", "NTFS decompression temporary");
 #endif
 
 /*
  * 
  */
 int
 ntfs_ntvattrrele(
 		 struct ntvattr * vap)
 {
 	dprintf(("ntfs_ntvattrrele: ino: %d, type: 0x%x\n",
 		 vap->va_ip->i_number, vap->va_type));
 
 	ntfs_ntrele(vap->va_ip);
 
 	return (0);
 }
 
 /*
  * Search attribute specifed in ntnode (load ntnode if nessecary).
  * If not found but ATTR_A_ATTRLIST present, read it in and search throught.
  * VOP_VGET node needed, and lookup througth it's ntnode (load if nessesary).
  *
  * ntnode should be locked
  */
 int
 ntfs_ntvattrget(
 		struct ntfsmount * ntmp,
 		struct ntnode * ip,
 		u_int32_t type,
 		char *name,
 		cn_t vcn,
 		struct ntvattr ** vapp)
 {
 	int             error;
 	struct ntvattr *vap;
 	struct ntvattr *lvap = NULL;
 	struct attr_attrlist *aalp;
 	struct attr_attrlist *nextaalp;
 	caddr_t         alpool;
 	int             len, namelen;
 
 	*vapp = NULL;
 
 	if (name) {
 		dprintf(("ntfs_ntvattrget: " \
 			 "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 			 ip->i_number, type, name, (u_int32_t) vcn));
 		namelen = strlen(name);
 	} else {
 		dprintf(("ntfs_ntvattrget: " \
 			 "ino: %d, type: 0x%x, vcn: %d\n", \
 			 ip->i_number, type, (u_int32_t) vcn));
 		name = "";
 		namelen = 0;
 	}
 
 	if((ip->i_flag & IN_LOADED) == 0) {
 		dprintf(("ntfs_ntvattrget: node not loaded, ino: %d\n",
 		       ip->i_number));
 		error = ntfs_loadntnode(ntmp,ip);
 		if(error) {
 			printf("ntfs_ntvattrget: FAILED TO LOAD INO: %d\n",
 			       ip->i_number);
 			return (error);
 		}
 	}
 
 	for (vap = ip->i_valist.lh_first; vap; vap = vap->va_list.le_next) {
 		ddprintf(("type: 0x%x, vcn: %d - %d\n", \
 			  vap->va_type, (u_int32_t) vap->va_vcnstart, \
 			  (u_int32_t) vap->va_vcnend));
 		if ((vap->va_type == type) &&
 		    (vap->va_vcnstart <= vcn) && (vap->va_vcnend >= vcn) &&
 		    (vap->va_namelen == namelen) &&
 		    (!strncmp(name, vap->va_name, namelen))) {
 			*vapp = vap;
 			ntfs_ntref(vap->va_ip);
 			return (0);
 		}
 		if (vap->va_type == NTFS_A_ATTRLIST)
 			lvap = vap;
 	}
 
 	if (!lvap) {
 		dprintf(("ntfs_ntvattrget: UNEXISTED ATTRIBUTE: " \
 		       "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 		       ip->i_number, type, name, (u_int32_t) vcn));
 		return (ENOENT);
 	}
 	/* Scan $ATTRIBUTE_LIST for requested attribute */
 	len = lvap->va_datalen;
 	MALLOC(alpool, caddr_t, len, M_TEMP, M_WAITOK);
 	error = ntfs_readntvattr_plain(ntmp, ip, lvap, 0, len, alpool, &len);
 	if (error)
 		goto out;
 
 	aalp = (struct attr_attrlist *) alpool;
 	nextaalp = NULL;
 
 	while (len > 0) {
 		dprintf(("ntfs_ntvattrget: " \
 			 "attrlist: ino: %d, attr: 0x%x, vcn: %d\n", \
 			 aalp->al_inumber, aalp->al_type, \
 			 (u_int32_t) aalp->al_vcnstart));
 
 		if (len > aalp->reclen) {
 			nextaalp = NTFS_NEXTREC(aalp, struct attr_attrlist *);
 		} else {
 			nextaalp = NULL;
 		}
 		len -= aalp->reclen;
 
 #define AALPCMP(aalp,type,name,namelen) (				\
   (aalp->al_type == type) && (aalp->al_namelen == namelen) &&		\
   !uastrcmp(aalp->al_name,aalp->al_namelen,name,namelen) )
 
 		if (AALPCMP(aalp, type, name, namelen) &&
 		    (!nextaalp || (nextaalp->al_vcnstart > vcn) ||
 		     !AALPCMP(nextaalp, type, name, namelen))) {
 			struct vnode   *newvp;
 			struct ntnode  *newip;
 
 			dprintf(("ntfs_ntvattrget: attrbute in ino: %d\n",
 				 aalp->al_inumber));
 
 /*
 			error = VFS_VGET(ntmp->ntm_mountp, aalp->al_inumber,
 					 &newvp);
 */
 			error = ntfs_vgetex(ntmp->ntm_mountp, aalp->al_inumber,
 					NTFS_A_DATA, NULL, LK_EXCLUSIVE,
 					VG_EXT, curproc, &newvp);
 			if (error) {
 				printf("ntfs_ntvattrget: CAN'T VGET INO: %d\n",
 				       aalp->al_inumber);
 				goto out;
 			}
 			newip = VTONT(newvp);
 			/* XXX have to lock ntnode */
 			if(~newip->i_flag & IN_LOADED) {
 				dprintf(("ntfs_ntvattrget: node not loaded," \
 					 " ino: %d\n", newip->i_number));
 				error = ntfs_loadntnode(ntmp,ip);
 				if(error) {
 					printf("ntfs_ntvattrget: CAN'T LOAD " \
 					       "INO: %d\n", newip->i_number);
 					vput(newvp);
 					goto out;
 				}
 			}
 			for (vap = newip->i_valist.lh_first; vap; vap = vap->va_list.le_next) {
 				if ((vap->va_type == type) &&
 				    (vap->va_vcnstart <= vcn) &&
 				    (vap->va_vcnend >= vcn) &&
 				    (vap->va_namelen == namelen) &&
 				  (!strncmp(name, vap->va_name, namelen))) {
 					*vapp = vap;
 					ntfs_ntref(vap->va_ip);
 					vput(newvp);
 					error = 0;
 					goto out;
 				}
 				if (vap->va_type == NTFS_A_ATTRLIST)
 					lvap = vap;
 			}
 			printf("ntfs_ntvattrget: ATTRLIST ERROR.\n");
 			vput(newvp);
 			break;
 		}
 #undef AALPCMP
 		aalp = nextaalp;
 	}
 	error = ENOENT;
 
 	dprintf(("ntfs_ntvattrget: UNEXISTED ATTRIBUTE: " \
 	       "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 	       ip->i_number, type, name, (u_int32_t) vcn));
 out:
 	FREE(alpool, M_TEMP);
 	return (error);
 }
 
 /*
  * Read ntnode from disk, make ntvattr list.
  *
  * ntnode should be locked
  */
 int
 ntfs_loadntnode(
 	      struct ntfsmount * ntmp,
 	      struct ntnode * ip)
 {
 	struct filerec  *mfrp;
 	daddr_t         bn;
 	int		error,off;
 	struct attr    *ap;
 	struct ntvattr *nvap;
 
 	dprintf(("ntfs_loadnode: loading ino: %d\n",ip->i_number));
 
 	MALLOC(mfrp, struct filerec *, ntfs_bntob(ntmp->ntm_bpmftrec),
 	       M_TEMP, M_WAITOK);
 
 	if (ip->i_number < NTFS_SYSNODESNUM) {
 		struct buf     *bp;
 
 		dprintf(("ntfs_loadnode: read system node\n"));
 
 		bn = ntfs_cntobn(ntmp->ntm_mftcn) +
 			ntmp->ntm_bpmftrec * ip->i_number;
 
 		error = bread(ntmp->ntm_devvp,
 			      bn, ntfs_bntob(ntmp->ntm_bpmftrec),
 			      NOCRED, &bp);
 		if (error) {
 			printf("ntfs_loadnode: BREAD FAILED\n");
 			brelse(bp);
 			goto out;
 		}
 		memcpy(mfrp, bp->b_data, ntfs_bntob(ntmp->ntm_bpmftrec));
 		bqrelse(bp);
 	} else {
 		struct vnode   *vp;
 
 		vp = ntmp->ntm_sysvn[NTFS_MFTINO];
 		error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			       ip->i_number * ntfs_bntob(ntmp->ntm_bpmftrec),
 			       ntfs_bntob(ntmp->ntm_bpmftrec), mfrp);
 		if (error) {
 			printf("ntfs_loadnode: ntfs_readattr failed\n");
 			goto out;
 		}
 	}
 
 	/* Check if magic and fixups are correct */
 	error = ntfs_procfixups(ntmp, NTFS_FILEMAGIC, (caddr_t)mfrp,
 				ntfs_bntob(ntmp->ntm_bpmftrec));
 	if (error) {
 		printf("ntfs_loadnode: BAD MFT RECORD %d\n",
 		       (u_int32_t) ip->i_number);
 		goto out;
 	}
 
 	dprintf(("ntfs_loadnode: load attrs for ino: %d\n",ip->i_number));
 	off = mfrp->fr_attroff;
 	ap = (struct attr *) ((caddr_t)mfrp + off);
 
 	LIST_INIT(&ip->i_valist);
 	
 	while (ap->a_hdr.a_type != -1) {
 		error = ntfs_attrtontvattr(ntmp, &nvap, ap);
 		if (error)
 			break;
 		nvap->va_ip = ip;
 
 		LIST_INSERT_HEAD(&ip->i_valist, nvap, va_list);
 
 		off += ap->a_hdr.reclen;
 		ap = (struct attr *) ((caddr_t)mfrp + off);
 	}
 	if (error) {
 		printf("ntfs_loadnode: failed to load attr ino: %d\n",
 		       ip->i_number);
 		goto out;
 	}
 
 	ip->i_mainrec = mfrp->fr_mainrec;
 	ip->i_nlink = mfrp->fr_nlink;
 	ip->i_frflag = mfrp->fr_flags;
 
 	ip->i_flag |= IN_LOADED;
 
 out:
 	FREE(mfrp, M_TEMP);
 	return (error);
 }
 		
 /*
  * Routine locks ntnode and increase usecount, just opposite of
  * ntfs_ntput.
  */
 int
 ntfs_ntget(
 	   struct ntnode *ip)
 {
 	dprintf(("ntfs_ntget: get ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount++;
 
 restart:
 	if (ip->i_lock) {
 		while (ip->i_lock) {
 			ip->i_lock = -1;
 			tsleep(&ip->i_lock, PVM, "ntnode", 0);
 		}
 		goto restart;
 	}
 	ip->i_lock = 1;
 
 	return 0;
 }
 
 /*
  * Routine search ntnode in hash, if found: lock, inc usecount and return.
  * If not in hash allocate structure for ntnode, prefill it, lock,
  * inc count and return.
  *
  * ntnode returned locked
  */
 static int ntfs_ntnode_hash_lock;
 int
 ntfs_ntlookup(
 	   struct ntfsmount * ntmp,
 	   ino_t ino,
 	   struct ntnode ** ipp)
 {
 	struct ntnode  *ip;
 
 	dprintf(("ntfs_ntlookup: for ntnode %d\n", ino));
 	*ipp = NULL;
 
 restart:
 	ip = ntfs_nthashlookup(ntmp->ntm_dev, ino); /* XXX */
 	if (ip) {
 		ntfs_ntget(ip);
 		*ipp = ip;
 		dprintf(("ntfs_ntlookup: ntnode %d: %p, usecount: %d\n",
 			ino, ip, ip->i_usecount));
 
 		return (0);
 	}
 
 	if (ntfs_ntnode_hash_lock) {
 		while(ntfs_ntnode_hash_lock) {
 			ntfs_ntnode_hash_lock = -1;
 			tsleep(&ntfs_ntnode_hash_lock, PVM, "ntfsntgt", 0);
 		}
 		goto restart;
 	}
 	ntfs_ntnode_hash_lock = 1;
 
 	MALLOC(ip, struct ntnode *, sizeof(struct ntnode),
 	       M_NTFSNTNODE, M_WAITOK);
 	ddprintf(("ntfs_ntlookup: allocating ntnode: %d: %p\n", ino, ip));
 	bzero((caddr_t) ip, sizeof(struct ntnode));
 
 	/* Generic initialization */
 	ip->i_number = ino;
 	ip->i_mp = ntmp;
 	ip->i_dev = ntmp->ntm_dev;
 	ip->i_uid = ntmp->ntm_uid;
 	ip->i_gid = ntmp->ntm_gid;
 	ip->i_mode = ntmp->ntm_mode;
 	ip->i_usecount++;
 
 	ip->i_lock = 1;
 
 	LIST_INIT(&ip->i_fnlist);
 
 	ntfs_nthashins(ip);
 
 	if (ntfs_ntnode_hash_lock < 0)
 		wakeup(&ntfs_ntnode_hash_lock);
 	ntfs_ntnode_hash_lock = 0;
 
 	*ipp = ip;
 
 	dprintf(("ntfs_ntlookup: ntnode %d: %p, usecount: %d\n",
 		ino, ip, ip->i_usecount));
 
 	return (0);
 }
 
 /*
  * Decrement usecount of ntnode and unlock it, if usecount reach zero,
  * deallocate ntnode.
  *
  * ntnode should be locked on entry, and unlocked on return.
  */
 void
 ntfs_ntput(
 	   struct ntnode *ip)
 {
 	struct ntvattr *vap;
 
 	if (!ip->i_lock) printf("ntfs_ntput: NOT LOCKED");
 
 	dprintf(("ntfs_ntput: rele ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount--;
 
 	if (ip->i_usecount < 0) {
 		panic("ntfs_ntput: ino: %d usecount: %d \n",
 		      ip->i_number,ip->i_usecount);
 	} else if (ip->i_usecount == 0) {
 		dprintf(("ntfs_ntput: deallocating ntnode: %d\n",
 			ip->i_number));
 
 		if (ip->i_fnlist.lh_first)
 			panic("ntfs_ntput: ntnode has fnodes\n");
 
 		ntfs_nthashrem(ip);
 
 		while (ip->i_valist.lh_first != NULL) {
 			vap = ip->i_valist.lh_first;
 			LIST_REMOVE(vap,va_list);
 			ntfs_freentvattr(vap);
 		}
 		FREE(ip, M_NTFSNTNODE);
 	} else {
 		if (ip->i_lock < 0)
 			wakeup(&ip->i_lock);
 		ip->i_lock = 0;
 	}
 }
 
 /*
  * Decrement usecount of ntnode.
  */
 void
 ntfs_ntrele(
 	    struct ntnode * ip)
 {
 	dprintf(("ntfs_ntrele: rele ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount--;
 
 	if (ip->i_usecount < 0)
 		panic("ntfs_ntrele: ino: %d usecount: %d \n",
 		      ip->i_number,ip->i_usecount);
 }
 
 /*
  * Deallocate all memory allocated for ntvattr by call to
  * ntfs_attrtontvattr and some other functions.
  */
 void
 ntfs_freentvattr(
 		 struct ntvattr * vap)
 {
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		if (vap->va_vruncn)
 			FREE(vap->va_vruncn, M_NTFSRUN);
 		if (vap->va_vruncl)
 			FREE(vap->va_vruncl, M_NTFSRUN);
 	} else {
 		if (vap->va_datap)
 			FREE(vap->va_datap, M_NTFSRDATA);
 	}
 	FREE(vap, M_NTFSNTVATTR);
 }
 
 /*
  * Convert disk image of attribute into ntvattr structure,
  * runs are expanded also.
  */
 int
 ntfs_attrtontvattr(
 		   struct ntfsmount * ntmp,
 		   struct ntvattr ** rvapp,
 		   struct attr * rap)
 {
 	int             error, i;
 	struct ntvattr *vap;
 
 	error = 0;
 	*rvapp = NULL;
 
 	MALLOC(vap, struct ntvattr *, sizeof(struct ntvattr),
 		M_NTFSNTVATTR, M_WAITOK);
 	bzero(vap, sizeof(struct ntvattr));
 	vap->va_ip = NULL;
 	vap->va_flag = rap->a_hdr.a_flag;
 	vap->va_type = rap->a_hdr.a_type;
 	vap->va_compression = rap->a_hdr.a_compression;
 	vap->va_index = rap->a_hdr.a_index;
 
 	ddprintf(("type: 0x%x, index: %d", vap->va_type, vap->va_index));
 
 	vap->va_namelen = rap->a_hdr.a_namelen;
 	if (rap->a_hdr.a_namelen) {
 		wchar *unp = (wchar *) ((caddr_t) rap + rap->a_hdr.a_nameoff);
 		ddprintf((", name:["));
 		for (i = 0; i < vap->va_namelen; i++) {
 			vap->va_name[i] = unp[i];
 			ddprintf(("%c", vap->va_name[i]));
 		}
 		ddprintf(("]"));
 	}
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		ddprintf((", nonres."));
 		vap->va_datalen = rap->a_nr.a_datalen;
 		vap->va_allocated = rap->a_nr.a_allocated;
 		vap->va_vcnstart = rap->a_nr.a_vcnstart;
 		vap->va_vcnend = rap->a_nr.a_vcnend;
 		vap->va_compressalg = rap->a_nr.a_compressalg;
 		error = ntfs_runtovrun(&(vap->va_vruncn), &(vap->va_vruncl),
 				       &(vap->va_vruncnt),
 				       (caddr_t) rap + rap->a_nr.a_dataoff);
 	} else {
 		vap->va_compressalg = 0;
 		ddprintf((", res."));
 		vap->va_datalen = rap->a_r.a_datalen;
 		vap->va_allocated = rap->a_r.a_datalen;
 		vap->va_vcnstart = 0;
 		vap->va_vcnend = ntfs_btocn(vap->va_allocated);
 		MALLOC(vap->va_datap, caddr_t, vap->va_datalen,
 		       M_NTFSRDATA, M_WAITOK);
 		memcpy(vap->va_datap, (caddr_t) rap + rap->a_r.a_dataoff,
 		       rap->a_r.a_datalen);
 	}
 	ddprintf((", len: %d", vap->va_datalen));
 
 	if (error)
 		FREE(vap, M_NTFSNTVATTR);
 	else
 		*rvapp = vap;
 
 	ddprintf(("\n"));
 
 	return (error);
 }
 
 /*
  * Expand run into more utilizable and more memory eating format.
  */
 int
 ntfs_runtovrun(
 	       cn_t ** rcnp,
 	       cn_t ** rclp,
 	       u_long * rcntp,
 	       u_int8_t * run)
 {
 	u_int32_t       off;
 	u_int32_t       sz, i;
 	cn_t           *cn;
 	cn_t           *cl;
 	u_long		cnt;
 	cn_t		prev;
 	cn_t		tmp;
 
 	off = 0;
 	cnt = 0;
 	i = 0;
 	while (run[off]) {
 		off += (run[off] & 0xF) + ((run[off] >> 4) & 0xF) + 1;
 		cnt++;
 	}
 	MALLOC(cn, cn_t *, cnt * sizeof(cn_t), M_NTFSRUN, M_WAITOK);
 	MALLOC(cl, cn_t *, cnt * sizeof(cn_t), M_NTFSRUN, M_WAITOK);
 
 	off = 0;
 	cnt = 0;
 	prev = 0;
 	while (run[off]) {
 
 		sz = run[off++];
 		cl[cnt] = 0;
 
 		for (i = 0; i < (sz & 0xF); i++)
 			cl[cnt] += (u_int32_t) run[off++] << (i << 3);
 
 		sz >>= 4;
 		if (run[off + sz - 1] & 0x80) {
 			tmp = ((u_int64_t) - 1) << (sz << 3);
 			for (i = 0; i < sz; i++)
 				tmp |= (u_int64_t) run[off++] << (i << 3);
 		} else {
 			tmp = 0;
 			for (i = 0; i < sz; i++)
 				tmp |= (u_int64_t) run[off++] << (i << 3);
 		}
 		if (tmp)
 			prev = cn[cnt] = prev + tmp;
 		else
 			cn[cnt] = tmp;
 
 		cnt++;
 	}
 	*rcnp = cn;
 	*rclp = cl;
 	*rcntp = cnt;
 	return (0);
 }
 
 /*
  * Convert wchar to uppercase wchar, should be macros?
  */
 wchar
 ntfs_toupper(
 	     struct ntfsmount * ntmp,
 	     wchar wc)
 {
 	return (ntmp->ntm_upcase[wc & 0xFF]);
 }
 
 /*
  * Compare to unicode strings case insensible.
  */
 int
 ntfs_uustricmp(
 	       struct ntfsmount * ntmp,
 	       wchar * str1,
 	       int str1len,
 	       wchar * str2,
 	       int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; i < str1len && i < str2len; i++) {
 		res = (int) ntfs_toupper(ntmp, str1[i]) -
 			(int) ntfs_toupper(ntmp, str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /*
  * Compare unicode and ascii string case insens.
  */
 int
 ntfs_uastricmp(
 	       struct ntfsmount * ntmp,
 	       const wchar *str1,
 	       int str1len,
 	       const char *str2,
 	       int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; i < str1len && i < str2len; i++) {
 		res = (int) ntfs_toupper(ntmp, str1[i]) -
 			(int) ntfs_toupper(ntmp, (wchar) str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /*
  * Compare unicode and ascii string case sens.
  */
 int
 ntfs_uastrcmp(
 	      struct ntfsmount *ntmp,
 	      const wchar *str1,
 	      int str1len,
 	      const char *str2,
 	      int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; (i < str1len) && (i < str2len); i++) {
 		res = ((int) str1[i]) - ((int) str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /* 
  * Search fnode in ntnode, if not found allocate and preinitialize.
  *
  * ntnode should be locked on entry.
  */
 int
 ntfs_fget(
 	struct ntfsmount *ntmp,
 	struct ntnode *ip,
 	int attrtype,
 	char *attrname,
 	struct fnode **fpp)
 {
 	struct fnode *fp;
 
 	dprintf(("ntfs_fget: ino: %d, attrtype: 0x%x, attrname: %s\n",
 		ip->i_number,attrtype, attrname?attrname:""));
 	*fpp = NULL;
 	for (fp = ip->i_fnlist.lh_first; fp != NULL; fp = fp->f_fnlist.le_next){
 		dprintf(("ntfs_fget: fnode: attrtype: %d, attrname: %s\n",
 			fp->f_attrtype, fp->f_attrname?fp->f_attrname:""));
 
 		if ((attrtype == fp->f_attrtype) && 
 		    ((!attrname && !fp->f_attrname) ||
 		     (attrname && fp->f_attrname &&
 		      !strcmp(attrname,fp->f_attrname)))){
 			dprintf(("ntfs_fget: found existed: %p\n",fp));
 			*fpp = fp;
 		}
 	}
 
 	if (*fpp)
 		return (0);
 
 	MALLOC(fp, struct fnode *, sizeof(struct fnode), M_NTFSFNODE, M_WAITOK);
 	bzero(fp, sizeof(struct fnode));
 	dprintf(("ntfs_fget: allocating fnode: %p\n",fp));
 
 	fp->f_devvp = ntmp->ntm_devvp;
 	fp->f_dev = ntmp->ntm_dev;
 	fp->f_mp = ntmp;
 
 	fp->f_ip = ip;
 	fp->f_attrname = attrname;
 	if (fp->f_attrname) fp->f_flag |= FN_AATTRNAME;
 	fp->f_attrtype = attrtype;
 
 	ntfs_ntref(ip);
 
 	LIST_INSERT_HEAD(&ip->i_fnlist, fp, f_fnlist);
 
 	*fpp = fp;
 
 	return (0);
 }
 
 /*
  * Deallocate fnode, remove it from ntnode's fnode list.
  *
  * ntnode should be locked.
  */
 void
 ntfs_frele(
 	struct fnode *fp)
 {
 	struct ntnode *ip = FTONT(fp);
 
 	dprintf(("ntfs_frele: fnode: %p for %d: %p\n", fp, ip->i_number, ip));
 
 	dprintf(("ntfs_frele: deallocating fnode\n"));
 	LIST_REMOVE(fp,f_fnlist);
 	if (fp->f_flag & FN_AATTRNAME)
 		FREE(fp->f_attrname, M_TEMP);
 	if (fp->f_dirblbuf)
 		FREE(fp->f_dirblbuf, M_NTFSDIR);
 	FREE(fp, M_NTFSFNODE);
 	ntfs_ntrele(ip);
 }
 
 /*
  * Lookup attribute name in format: [[:$ATTR_TYPE]:$ATTR_NAME], 
  * $ATTR_TYPE is searched in attrdefs read from $AttrDefs.
  * If $ATTR_TYPE nott specifed, ATTR_A_DATA assumed.
  */
 int
 ntfs_ntlookupattr(
 		struct ntfsmount * ntmp,
 		const char * name,
 		int namelen,
 		int *attrtype,
 		char **attrname)
 {
 	const char *sys;
 	size_t syslen, i;
 	struct ntvattrdef *adp;
 
 	if (namelen == 0)
 		return (0);
 
 	if (name[0] == '$') {
 		sys = name;
 		for (syslen = 0; syslen < namelen; syslen++) {
 			if(sys[syslen] == ':') {
 				name++;
 				namelen--;
 				break;
 			}
 		}
 		name += syslen;
 		namelen -= syslen;
 
 		adp = ntmp->ntm_ad;
 		for (i = 0; i < ntmp->ntm_adnum; i++){
 			if((syslen == adp->ad_namelen) && 
 			   (!strncmp(sys,adp->ad_name,syslen))) {
 				*attrtype = adp->ad_type;
 				if(namelen) {
 					MALLOC((*attrname), char *, namelen,
 						M_TEMP, M_WAITOK);
 					memcpy((*attrname), name, namelen);
 					(*attrname)[namelen] = '\0';
 				}
 				return (0);
 			}
 			adp++;
 		}
 		return (ENOENT);
 	}
 
 	if(namelen) {
 		MALLOC((*attrname), char *, namelen, M_TEMP, M_WAITOK);
 		memcpy((*attrname), name, namelen);
 		(*attrname)[namelen] = '\0';
 		*attrtype = NTFS_A_DATA;
 	}
 
 	return (0);
 }
 
 /*
  * Lookup specifed node for filename, matching cnp,
  * return fnode filled.
  */
 int
 ntfs_ntlookupfile(
 	      struct ntfsmount * ntmp,
 	      struct vnode * vp,
 	      struct componentname * cnp,
 	      struct vnode ** vpp)
 {
 	struct fnode   *fp = VTOF(vp);
 	struct ntnode  *ip = FTONT(fp);
 	struct ntvattr *vap;	/* Root attribute */
 	cn_t            cn;	/* VCN in current attribute */
 	caddr_t         rdbuf;	/* Buffer to read directory's blocks  */
 	u_int32_t       blsize;
 	u_int32_t       rdsize;	/* Length of data to read from current block */
 	struct attr_indexentry *iep;
 	int             error, res, anamelen, fnamelen;
 	const char     *fname,*aname;
 	u_int32_t       aoff;
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXROOT, "$I30", 0, &vap);
 	if (error || (vap->va_flag & NTFS_AF_INRUN))
 		return (ENOTDIR);
 
 	blsize = vap->va_a_iroot->ir_size;
 	rdsize = vap->va_datalen;
 
 	/*
 	 * Divide file name into: foofilefoofilefoofile[:attrspec]
 	 * Store like this:       fname:fnamelen       [aname:anamelen]
 	 */
 	fname = cnp->cn_nameptr;
 	aname = NULL;
 	anamelen = 0;
 	for (fnamelen = 0; fnamelen < cnp->cn_namelen; fnamelen++)
 		if(fname[fnamelen] == ':') {
 			aname = fname + fnamelen + 1;
 			anamelen = cnp->cn_namelen - fnamelen - 1;
 			dprintf(("ntfs_ntlookupfile: %s (%d), attr: %s (%d)\n",
 				fname, fnamelen, aname, anamelen));
 			break;
 		}
 
 	dprintf(("ntfs_ntlookupfile: blksz: %d, rdsz: %d\n", blsize, rdsize));
 
 	MALLOC(rdbuf, caddr_t, blsize, M_TEMP, M_WAITOK);
 
 	error = ntfs_readattr(ntmp, ip, NTFS_A_INDXROOT, "$I30",
 			       0, rdsize, rdbuf);
 	if (error)
 		goto fail;
 
 	aoff = sizeof(struct attr_indexroot);
 
 	do {
 		iep = (struct attr_indexentry *) (rdbuf + aoff);
 
 		while (!(iep->ie_flag & NTFS_IEFLAG_LAST) && (rdsize > aoff)) {
 			ddprintf(("scan: %d, %d\n",
 				  (u_int32_t) iep->ie_number,
 				  (u_int32_t) iep->ie_fnametype));
 			res = ntfs_uastricmp(ntmp, iep->ie_fname,
 					     iep->ie_fnamelen, fname,
 					     fnamelen);
 			if (res == 0) {
 				/* Matched something (case ins.) */
 				if (iep->ie_fnametype == 0 ||
 				    !(ntmp->ntm_flag & NTFS_MFLAG_CASEINS))
 					res = ntfs_uastrcmp(ntmp,
 							    iep->ie_fname,
 							    iep->ie_fnamelen,
 							    fname,
 							    fnamelen);
 				if (res == 0) {
 					int attrtype = NTFS_A_DATA;
 					char *attrname = NULL;
 					struct fnode   *nfp;
 					struct vnode   *nvp;
 
 					if (aname) {
 						error = ntfs_ntlookupattr(ntmp,
 							aname, anamelen,
 							&attrtype, &attrname);
 						if (error)
 							goto fail;
 					}
 
 					/* Check if we've found ourself */
 					if ((iep->ie_number == ip->i_number) &&
 					    (attrtype == fp->f_attrtype) &&
 					    ((!attrname && !fp->f_attrname) ||
 					     (attrname && fp->f_attrname &&
 					      !strcmp(attrname, fp->f_attrname)))) {
 						VREF(vp);
 						*vpp = vp;
 						goto fail;
 					}
 
 					/* vget node, but don't load it */
 					error = ntfs_vgetex(ntmp->ntm_mountp,
 							   iep->ie_number,
 							   attrtype,
 							   attrname,
 							   LK_EXCLUSIVE,
 							   VG_DONTLOADIN | 
 							    VG_DONTVALIDFN,
 							   curproc,
 							   &nvp);
 					if(error)
 						goto fail;
 
 					nfp = VTOF(nvp);
 
 					if (nfp->f_flag & FN_VALID) {
 						*vpp = nvp;
 						goto fail;
 					}
 
 					nfp->f_fflag = iep->ie_fflag;
 					nfp->f_pnumber = iep->ie_fpnumber;
 					nfp->f_times = iep->ie_ftimes;
 
 					if((nfp->f_fflag & NTFS_FFLAG_DIR) &&
 					   (nfp->f_attrtype == NTFS_A_DATA) &&
 					   (nfp->f_attrname == NULL))
 						nfp->f_type = VDIR;	
 					else
 						nfp->f_type = VREG;	
 
 					nvp->v_type = nfp->f_type;
 
 					if ((nfp->f_attrtype == NTFS_A_DATA) &&
 					    (nfp->f_attrname == NULL)) {
 						/* Opening default attribute */
 						nfp->f_size = iep->ie_fsize;
 						nfp->f_allocated = iep->ie_fallocated;
 						nfp->f_flag |= FN_PRELOADED;
 					} else {
 						error = ntfs_filesize(ntmp, nfp,
 							    &nfp->f_size,
 							    &nfp->f_allocated);
 						if (error) {
 							vput(nvp);
 							goto fail;
 						}
 					}
 
 					nfp->f_flag &= ~FN_VALID;
 					*vpp = nvp;
 					goto fail;
 				}
 			} else if (res > 0)
 				break;
 
 			aoff += iep->reclen;
 			iep = (struct attr_indexentry *) (rdbuf + aoff);
 		}
 
 		/* Dive if possible */
 		if (iep->ie_flag & NTFS_IEFLAG_SUBNODE) {
 			dprintf(("ntfs_ntlookupfile: diving\n"));
 
 			cn = *(cn_t *) (rdbuf + aoff +
 					iep->reclen - sizeof(cn_t));
 			rdsize = blsize;
 
 			error = ntfs_readattr(ntmp, ip, NTFS_A_INDX, "$I30",
 					     ntfs_cntob(cn), rdsize, rdbuf);
 			if (error)
 				goto fail;
 
 			error = ntfs_procfixups(ntmp, NTFS_INDXMAGIC,
 						rdbuf, rdsize);
 			if (error)
 				goto fail;
 
 			aoff = (((struct attr_indexalloc *) rdbuf)->ia_hdrsize +
 				0x18);
 		} else {
 			dprintf(("ntfs_ntlookupfile: nowhere to dive :-(\n"));
 			error = ENOENT;
 			break;
 		}
 	} while (1);
 
 	dprintf(("finish\n"));
 
 fail:
 	ntfs_ntvattrrele(vap);
 	ntfs_ntput(ip);
 	FREE(rdbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Check if name type is permitted to show.
  */
 int
 ntfs_isnamepermitted(
 		     struct ntfsmount * ntmp,
 		     struct attr_indexentry * iep)
 {
 
 	if (ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES)
 		return 1;
 
 	switch (iep->ie_fnametype) {
 	case 2:
 		ddprintf(("ntfs_isnamepermitted: skiped DOS name\n"));
 		return 0;
 	case 0: case 1: case 3:
 		return 1;
 	default:
 		printf("ntfs_isnamepermitted: " \
 		       "WARNING! Unknown file name type: %d\n",
 		       iep->ie_fnametype);
 		break;
 	}
 	return 0;
 }
 
 /*
  * Read ntfs dir like stream of attr_indexentry, not like btree of them.
  * This is done by scaning $BITMAP:$I30 for busy clusters and reading them.
  * Ofcouse $INDEX_ROOT:$I30 is read before. Last read values are stored in
  * fnode, so we can skip toward record number num almost immediatly.
  * Anyway this is rather slow routine. The problem is that we don't know
  * how many records are there in $INDEX_ALLOCATION:$I30 block.
  */
 int
 ntfs_ntreaddir(
 	       struct ntfsmount * ntmp,
 	       struct fnode * fp,
 	       u_int32_t num,
 	       struct attr_indexentry ** riepp)
 {
 	struct ntnode  *ip = FTONT(fp);
 	struct ntvattr *vap = NULL;	/* IndexRoot attribute */
 	struct ntvattr *bmvap = NULL;	/* BitMap attribute */
 	struct ntvattr *iavap = NULL;	/* IndexAllocation attribute */
 	caddr_t         rdbuf;		/* Buffer to read directory's blocks  */
 	u_char         *bmp = NULL;	/* Bitmap */
 	u_int32_t       blsize;		/* Index allocation size (2048) */
 	u_int32_t       rdsize;		/* Length of data to read */
 	u_int32_t       attrnum;	/* Current attribute type */
 	u_int32_t       cpbl = 1;	/* Clusters per directory block */
 	u_int32_t       blnum;
 	struct attr_indexentry *iep;
 	int             error = ENOENT;
 	u_int32_t       aoff, cnum;
 
 	dprintf(("ntfs_ntreaddir: read ino: %d, num: %d\n", ip->i_number, num));
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXROOT, "$I30", 0, &vap);
 	if (error)
 		return (ENOTDIR);
 
 	if (fp->f_dirblbuf == NULL) {
 		fp->f_dirblsz = vap->va_a_iroot->ir_size;
 		MALLOC(fp->f_dirblbuf, caddr_t,
 		       max(vap->va_datalen,fp->f_dirblsz), M_NTFSDIR, M_WAITOK);
 	}
 
 	blsize = fp->f_dirblsz;
 	rdbuf = fp->f_dirblbuf;
 
 	dprintf(("ntfs_ntreaddir: rdbuf: 0x%p, blsize: %d\n", rdbuf, blsize));
 
 	if (vap->va_a_iroot->ir_flag & NTFS_IRFLAG_INDXALLOC) {
 		error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXBITMAP, "$I30",
 					0, &bmvap);
 		if (error) {
 			error = ENOTDIR;
 			goto fail;
 		}
 		MALLOC(bmp, u_char *, bmvap->va_datalen, M_TEMP, M_WAITOK);
 		error = ntfs_readattr(ntmp, ip, NTFS_A_INDXBITMAP, "$I30", 0,
 				       bmvap->va_datalen, bmp);
 		if (error)
 			goto fail;
 
 		error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDX, "$I30",
 					0, &iavap);
 		if (error) {
 			error = ENOTDIR;
 			goto fail;
 		}
 		cpbl = ntfs_btocn(blsize + ntfs_cntob(1) - 1);
 		dprintf(("ntfs_ntreaddir: indexalloc: %d, cpbl: %d\n",
 			 iavap->va_datalen, cpbl));
 	} else {
 		dprintf(("ntfs_ntreadidir: w/o BitMap and IndexAllocation\n"));
 		iavap = bmvap = NULL;
 		bmp = NULL;
 	}
 
 	/* Try use previous values */
 	if ((fp->f_lastdnum < num) && (fp->f_lastdnum != 0)) {
 		attrnum = fp->f_lastdattr;
 		aoff = fp->f_lastdoff;
 		blnum = fp->f_lastdblnum;
 		cnum = fp->f_lastdnum;
 	} else {
 		attrnum = NTFS_A_INDXROOT;
 		aoff = sizeof(struct attr_indexroot);
 		blnum = 0;
 		cnum = 0;
 	}
 
 	do {
 		dprintf(("ntfs_ntreaddir: scan: 0x%x, %d, %d, %d, %d\n",
 			 attrnum, (u_int32_t) blnum, cnum, num, aoff));
 		rdsize = (attrnum == NTFS_A_INDXROOT) ? vap->va_datalen : blsize;
 		error = ntfs_readattr(ntmp, ip, attrnum, "$I30",
 				   ntfs_cntob(blnum * cpbl), rdsize, rdbuf);
 		if (error)
 			goto fail;
 
 		if (attrnum == NTFS_A_INDX) {
 			error = ntfs_procfixups(ntmp, NTFS_INDXMAGIC,
 						rdbuf, rdsize);
 			if (error)
 				goto fail;
 		}
 		if (aoff == 0)
 			aoff = (attrnum == NTFS_A_INDX) ?
 				(0x18 + ((struct attr_indexalloc *) rdbuf)->ia_hdrsize) :
 				sizeof(struct attr_indexroot);
 
 		iep = (struct attr_indexentry *) (rdbuf + aoff);
 		while (!(iep->ie_flag & NTFS_IEFLAG_LAST) && (rdsize > aoff)) {
 			if (ntfs_isnamepermitted(ntmp, iep)) {
 				if (cnum >= num) {
 					fp->f_lastdnum = cnum;
 					fp->f_lastdoff = aoff;
 					fp->f_lastdblnum = blnum;
 					fp->f_lastdattr = attrnum;
 
 					*riepp = iep;
 
 					error = 0;
 					goto fail;
 				}
 				cnum++;
 			}
 			aoff += iep->reclen;
 			iep = (struct attr_indexentry *) (rdbuf + aoff);
 		}
 
 		if (iavap) {
 			if (attrnum == NTFS_A_INDXROOT)
 				blnum = 0;
 			else
 				blnum++;
 
 			while (ntfs_cntob(blnum * cpbl) < iavap->va_datalen) {
 				if (bmp[blnum >> 3] & (1 << (blnum & 3)))
 					break;
 				blnum++;
 			}
 
 			attrnum = NTFS_A_INDX;
 			aoff = 0;
 			if (ntfs_cntob(blnum * cpbl) >= iavap->va_datalen)
 				break;
 			dprintf(("ntfs_ntreaddir: blnum: %d\n", (u_int32_t) blnum));
 		}
 	} while (iavap);
 
 	*riepp = NULL;
 	fp->f_lastdnum = 0;
 
 fail:
 	if (vap)
 		ntfs_ntvattrrele(vap);
 	if (bmvap)
 		ntfs_ntvattrrele(bmvap);
 	if (iavap)
 		ntfs_ntvattrrele(iavap);
 	if (bmp)
 		FREE(bmp, M_TEMP);
 	ntfs_ntput(ip);
 	return (error);
 }
 
 /*
  * Convert NTFS times that are in 100 ns units and begins from
  * 1601 Jan 1 into unix times.
  */
 struct timespec
 ntfs_nttimetounix(
 		  u_int64_t nt)
 {
 	struct timespec t;
 
 	/* WindowNT times are in 100 ns and from 1601 Jan 1 */
 	t.tv_nsec = (nt % (1000 * 1000 * 10)) * 100;
 	t.tv_sec = nt / (1000 * 1000 * 10) -
 		369LL * 365LL * 24LL * 60LL * 60LL -
 		89LL * 1LL * 24LL * 60LL * 60LL;
 	return (t);
 }
 
 /*
  * Get file times from NTFS_A_NAME attribute.
  */
 int
 ntfs_times(
 	   struct ntfsmount * ntmp,
 	   struct ntnode * ip,
 	   ntfs_times_t * tm)
 {
 	struct ntvattr *vap;
 	int             error;
 
 	dprintf(("ntfs_times: ino: %d...\n", ip->i_number));
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_NAME, NULL, 0, &vap);
 	if (error) {
 		ntfs_ntput(ip);
 		return (error);
 	}
 	*tm = vap->va_a_name->n_times;
 	ntfs_ntvattrrele(vap);
 	ntfs_ntput(ip);
 
 	return (0);
 }
 
 /*
  * Get file sizes from corresponding attribute. 
  * 
  * ntnode under fnode should be locked.
  */
 int
 ntfs_filesize(
 	      struct ntfsmount * ntmp,
 	      struct fnode * fp,
 	      u_int64_t * size,
 	      u_int64_t * bytes)
 {
 	struct ntvattr *vap;
 	struct ntnode *ip = FTONT(fp);
 	u_int64_t       sz, bn;
 	int             error;
 
 	dprintf(("ntfs_filesize: ino: %d\n", ip->i_number));
 
 	error = ntfs_ntvattrget(ntmp, ip,
 		fp->f_attrtype, fp->f_attrname, 0, &vap);
 	if (error)
 		return (error);
 
 	bn = vap->va_allocated;
 	sz = vap->va_datalen;
 
 	dprintf(("ntfs_filesize: %d bytes (%d bytes allocated)\n",
 		(u_int32_t) sz, (u_int32_t) bn));
 
 	if (size)
 		*size = sz;
 	if (bytes)
 		*bytes = bn;
 
 	ntfs_ntvattrrele(vap);
 
 	return (0);
 }
 
 /*
  * This is one of write routine.
  *
  * ntnode should be locked.
  */
 int
 ntfs_writeattr_plain(
 		     struct ntfsmount * ntmp,
 		     struct ntnode * ip,
 		     u_int32_t attrnum,	
 		     char *attrname,
 		     off_t roff,
 		     size_t rsize,
 		     void *rdata,
 		     size_t * initp)
 {
 	size_t          init;
 	int             error = 0;
 	off_t           off = roff, left = rsize, towrite;
 	caddr_t         data = rdata;
 	struct ntvattr *vap;
 	*initp = 0;
 
 	while (left) {
 		error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname,
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
 		towrite = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_writeattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) towrite,
 			 (u_int32_t) vap->va_vcnstart,
 			 (u_int32_t) vap->va_vcnend));
 		error = ntfs_writentvattr_plain(ntmp, ip, vap,
 					 off - ntfs_cntob(vap->va_vcnstart),
 					 towrite, data, &init);
 		if (error) {
 			printf("ntfs_writeattr_plain: " \
 			       "ntfs_writentvattr_plain failed: o: %d, s: %d\n",
 			       (u_int32_t) off, (u_int32_t) towrite);
 			printf("ntfs_writeattr_plain: attrib: %d - %d\n",
 			       (u_int32_t) vap->va_vcnstart, 
 			       (u_int32_t) vap->va_vcnend);
 			ntfs_ntvattrrele(vap);
 			break;
 		}
 		ntfs_ntvattrrele(vap);
 		left -= towrite;
 		off += towrite;
 		data = data + towrite;
 		*initp += init;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of write routine.
  *
  * ntnode should be locked.
  */
 int
 ntfs_writentvattr_plain(
 			struct ntfsmount * ntmp,
 			struct ntnode * ip,
 			struct ntvattr * vap,
 			off_t roff,
 			size_t rsize,
 			void *rdata,
 			size_t * initp)
 {
 	int             error = 0;
 	int             off;
 
 	*initp = 0;
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		int             cnt;
 		cn_t            ccn, ccl, cn, left, cl;
 		caddr_t         data = rdata;
 		struct buf     *bp;
 		size_t          tocopy;
 
 		ddprintf(("ntfs_writentvattr_plain: data in run: %d chains\n",
 			 vap->va_vruncnt));
 
 		off = roff;
 		left = rsize;
 		ccl = 0;
 		ccn = 0;
 		cnt = 0;
 		while (left && (cnt < vap->va_vruncnt)) {
 			ccn = vap->va_vruncn[cnt];
 			ccl = vap->va_vruncl[cnt];
 
 			ddprintf(("ntfs_writentvattr_plain: " \
 				 "left %d, cn: 0x%x, cl: %d, off: %d\n", \
 				 (u_int32_t) left, (u_int32_t) ccn, \
 				 (u_int32_t) ccl, (u_int32_t) off));
 
 			if (ntfs_cntob(ccl) < off) {
 				off -= ntfs_cntob(ccl);
 				cnt++;
 				continue;
 			}
 			if (ccn || ip->i_number == NTFS_BOOTINO) { /* XXX */
 				ccl -= ntfs_btocn(off);
 				cn = ccn + ntfs_btocn(off);
 				off = ntfs_btocnoff(off);
 
 				while (left && ccl) {
 					tocopy = min(left,
 						  min(ntfs_cntob(ccl) - off,
 						      MAXBSIZE - off));
 					cl = ntfs_btocl(tocopy + off);
 					ddprintf(("ntfs_writentvattr_plain: " \
 						"write: cn: 0x%x cl: %d, " \
 						"off: %d len: %d, left: %d\n",
 						(u_int32_t) cn, 
 						(u_int32_t) cl, 
 						(u_int32_t) off, 
 						(u_int32_t) tocopy, 
 						(u_int32_t) left));
 					if ((off == 0) && 
 					    (tocopy == ntfs_cntob(cl))) {
 						bp = getblk(ntmp->ntm_devvp,
 							    ntfs_cntobn(cn),
 							    ntfs_cntob(cl),
 							    0, 0);
 						clrbuf(bp);
 					} else {
 						error = bread(ntmp->ntm_devvp,
 							      ntfs_cntobn(cn),
 							      ntfs_cntob(cl),
 							      NOCRED, &bp);
 						if (error) {
 							brelse(bp);
 							return (error);
 						}
 					}
 					memcpy(bp->b_data + off, data, tocopy);
 					bawrite(bp);
 					data = data + tocopy;
 					*initp += tocopy;
 					off = 0;
 					left -= tocopy;
 					cn += cl;
 					ccl -= cl;
 				}
 			}
 			cnt++;
 		}
 		if (left) {
 			printf("ntfs_writentvattr_plain: POSSIBLE RUN ERROR\n");
 			error = EINVAL;
 		}
 	} else {
 		printf("ntfs_writevattr_plain: CAN'T WRITE RES. ATTRIBUTE\n");
 		error = ENOTTY;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readntvattr_plain(
 			struct ntfsmount * ntmp,
 			struct ntnode * ip,
 			struct ntvattr * vap,
 			off_t roff,
 			size_t rsize,
 			void *rdata,
 			size_t * initp)
 {
 	int             error = 0;
 	int             off;
 
 	*initp = 0;
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		int             cnt;
 		cn_t            ccn, ccl, cn, left, cl;
 		caddr_t         data = rdata;
 		struct buf     *bp;
 		size_t          tocopy;
 
 		ddprintf(("ntfs_readntvattr_plain: data in run: %d chains\n",
 			 vap->va_vruncnt));
 
 		off = roff;
 		left = rsize;
 		ccl = 0;
 		ccn = 0;
 		cnt = 0;
 		while (left && (cnt < vap->va_vruncnt)) {
 			ccn = vap->va_vruncn[cnt];
 			ccl = vap->va_vruncl[cnt];
 
 			ddprintf(("ntfs_readntvattr_plain: " \
 				 "left %d, cn: 0x%x, cl: %d, off: %d\n", \
 				 (u_int32_t) left, (u_int32_t) ccn, \
 				 (u_int32_t) ccl, (u_int32_t) off));
 
 			if (ntfs_cntob(ccl) < off) {
 				off -= ntfs_cntob(ccl);
 				cnt++;
 				continue;
 			}
 			if (ccn || ip->i_number == NTFS_BOOTINO) {
 				ccl -= ntfs_btocn(off);
 				cn = ccn + ntfs_btocn(off);
 				off = ntfs_btocnoff(off);
 
 				while (left && ccl) {
 					tocopy = min(left,
 						  min(ntfs_cntob(ccl) - off,
 						      MAXBSIZE - off));
 					cl = ntfs_btocl(tocopy + off);
 					ddprintf(("ntfs_readntvattr_plain: " \
 						"read: cn: 0x%x cl: %d, " \
 						"off: %d len: %d, left: %d\n",
 						(u_int32_t) cn, 
 						(u_int32_t) cl, 
 						(u_int32_t) off, 
 						(u_int32_t) tocopy, 
 						(u_int32_t) left));
 					error = bread(ntmp->ntm_devvp,
 						      ntfs_cntobn(cn),
 						      ntfs_cntob(cl),
 						      NOCRED, &bp);
 					if (error) {
 						brelse(bp);
 						return (error);
 					}
 					memcpy(data, bp->b_data + off, tocopy);
 					brelse(bp);
 					data = data + tocopy;
 					*initp += tocopy;
 					off = 0;
 					left -= tocopy;
 					cn += cl;
 					ccl -= cl;
 				}
 			} else {
 				tocopy = min(left, ntfs_cntob(ccl) - off);
 				ddprintf(("ntfs_readntvattr_plain: "
 					"sparce: ccn: 0x%x ccl: %d, off: %d, " \
 					" len: %d, left: %d\n", 
 					(u_int32_t) ccn, (u_int32_t) ccl, 
 					(u_int32_t) off, (u_int32_t) tocopy, 
 					(u_int32_t) left));
 				left -= tocopy;
 				off = 0;
 				bzero(data, tocopy);
 				data = data + tocopy;
 			}
 			cnt++;
 		}
 		if (left) {
 			printf("ntfs_readntvattr_plain: POSSIBLE RUN ERROR\n");
 			error = E2BIG;
 		}
 	} else {
 		ddprintf(("ntfs_readnvattr_plain: data is in mft record\n"));
 		memcpy(rdata, vap->va_datap + roff, rsize);
 		*initp += rsize;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readattr_plain(
 		     struct ntfsmount * ntmp,
 		     struct ntnode * ip,
 		     u_int32_t attrnum,	
 		     char *attrname,
 		     off_t roff,
 		     size_t rsize,
 		     void *rdata,
 		     size_t * initp)
 {
 	size_t          init;
 	int             error = 0;
 	off_t           off = roff, left = rsize, toread;
 	caddr_t         data = rdata;
 	struct ntvattr *vap;
 	*initp = 0;
 
 	while (left) {
 		error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname,
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
 		toread = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_readattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) toread,
 			 (u_int32_t) vap->va_vcnstart,
 			 (u_int32_t) vap->va_vcnend));
 		error = ntfs_readntvattr_plain(ntmp, ip, vap,
 					 off - ntfs_cntob(vap->va_vcnstart),
 					 toread, data, &init);
 		if (error) {
 			printf("ntfs_readattr_plain: " \
 			       "ntfs_readntvattr_plain failed: o: %d, s: %d\n",
 			       (u_int32_t) off, (u_int32_t) toread);
 			printf("ntfs_readattr_plain: attrib: %d - %d\n",
 			       (u_int32_t) vap->va_vcnstart, 
 			       (u_int32_t) vap->va_vcnend);
 			ntfs_ntvattrrele(vap);
 			break;
 		}
 		ntfs_ntvattrrele(vap);
 		left -= toread;
 		off += toread;
 		data = data + toread;
 		*initp += init;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readattr(
 	       struct ntfsmount * ntmp,
 	       struct ntnode * ip,
 	       u_int32_t attrnum,
 	       char *attrname,
 	       off_t roff,
 	       size_t rsize,
 	       void *rdata)
 {
 	int             error = 0;
 	struct ntvattr *vap;
 	size_t          init;
 
 	ddprintf(("ntfs_readattr: reading %d: 0x%x, from %d size %d bytes\n",
 	       ip->i_number, attrnum, (u_int32_t) roff, (u_int32_t) rsize));
 
 	error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname, 0, &vap);
 	if (error)
 		return (error);
 
 	if ((roff > vap->va_datalen) ||
 	    (roff + rsize > vap->va_datalen)) {
 		ddprintf(("ntfs_readattr: offset too big\n"));
 		ntfs_ntvattrrele(vap);
 		return (E2BIG);
 	}
 	if (vap->va_compression && vap->va_compressalg) {
 		u_int8_t       *cup;
 		u_int8_t       *uup;
 		off_t           off = roff, left = rsize, tocopy;
 		caddr_t         data = rdata;
 		cn_t            cn;
 
 		ddprintf(("ntfs_ntreadattr: compression: %d\n",
 			 vap->va_compressalg));
 
 		MALLOC(cup, u_int8_t *, ntfs_cntob(NTFS_COMPUNIT_CL),
 		       M_NTFSDECOMP, M_WAITOK);
 		MALLOC(uup, u_int8_t *, ntfs_cntob(NTFS_COMPUNIT_CL),
 		       M_NTFSDECOMP, M_WAITOK);
 
 		cn = (ntfs_btocn(roff)) & (~(NTFS_COMPUNIT_CL - 1));
 		off = roff - ntfs_cntob(cn);
 
 		while (left) {
 			error = ntfs_readattr_plain(ntmp, ip, attrnum,
 						  attrname, ntfs_cntob(cn),
 					          ntfs_cntob(NTFS_COMPUNIT_CL),
 						  cup, &init);
 			if (error)
 				break;
 
 			tocopy = min(left, ntfs_cntob(NTFS_COMPUNIT_CL) - off);
 
 			if (init == ntfs_cntob(NTFS_COMPUNIT_CL)) {
 				memcpy(data, cup + off, tocopy);
 			} else if (init == 0) {
 				bzero(data, tocopy);
 			} else {
 				error = ntfs_uncompunit(ntmp, uup, cup);
 				if (error)
 					break;
 				memcpy(data, uup + off, tocopy);
 			}
 
 			left -= tocopy;
 			data = data + tocopy;
 			off += tocopy - ntfs_cntob(NTFS_COMPUNIT_CL);
 			cn += NTFS_COMPUNIT_CL;
 		}
 
 		FREE(uup, M_NTFSDECOMP);
 		FREE(cup, M_NTFSDECOMP);
 	} else
 		error = ntfs_readattr_plain(ntmp, ip, attrnum, attrname,
 					     roff, rsize, rdata, &init);
 	ntfs_ntvattrrele(vap);
 	return (error);
 }
 
 #if UNUSED_CODE
 int
 ntfs_parserun(
 	      cn_t * cn,
 	      cn_t * cl,
 	      u_int8_t * run,
 	      u_long len,
 	      u_long *off)
 {
 	u_int8_t        sz;
 	int             i;
 
 	if (NULL == run) {
 		printf("ntfs_parsetun: run == NULL\n");
 		return (EINVAL);
 	}
 	sz = run[(*off)++];
 	if (0 == sz) {
 		printf("ntfs_parserun: trying to go out of run\n");
 		return (E2BIG);
 	}
 	*cl = 0;
 	if ((sz & 0xF) > 8 || (*off) + (sz & 0xF) > len) {
 		printf("ntfs_parserun: " \
 		       "bad run: length too big: sz: 0x%02x (%ld < %ld + sz)\n",
 		       sz, len, *off);
 		return (EINVAL);
 	}
 	for (i = 0; i < (sz & 0xF); i++)
 		*cl += (u_int32_t) run[(*off)++] << (i << 3);
 
 	sz >>= 4;
 	if ((sz & 0xF) > 8 || (*off) + (sz & 0xF) > len) {
 		printf("ntfs_parserun: " \
 		       "bad run: length too big: sz: 0x%02x (%ld < %ld + sz)\n",
 		       sz, len, *off);
 		return (EINVAL);
 	}
 	for (i = 0; i < (sz & 0xF); i++)
 		*cn += (u_int32_t) run[(*off)++] << (i << 3);
 
 	return (0);
 }
 #endif
 
 /*
  * Process fixup routine on given buffer.
  */
 int
 ntfs_procfixups(
 		struct ntfsmount * ntmp,
 		u_int32_t magic,
 		caddr_t buf,
 		size_t len)
 {
 	struct fixuphdr *fhp = (struct fixuphdr *) buf;
 	int             i;
 	u_int16_t       fixup;
 	u_int16_t      *fxp;
 	u_int16_t      *cfxp;
 
 	if (fhp->fh_magic != magic) {
 		printf("ntfs_procfixups: magic doesn't match: %08x != %08x\n",
 		       fhp->fh_magic, magic);
 		return (EINVAL);
 	}
 	if ((fhp->fh_fnum - 1) * ntmp->ntm_bps != len) {
 		printf("ntfs_procfixups: " \
 		       "bad fixups number: %d for %d bytes block\n", 
 		       fhp->fh_fnum, len);
 		return (EINVAL);
 	}
 	if (fhp->fh_foff >= ntmp->ntm_spc * ntmp->ntm_mftrecsz * ntmp->ntm_bps) {
 		printf("ntfs_procfixups: invalid offset: %x", fhp->fh_foff);
 		return (EINVAL);
 	}
 	fxp = (u_int16_t *) (buf + fhp->fh_foff);
 	cfxp = (u_int16_t *) (buf + ntmp->ntm_bps - 2);
 	fixup = *fxp++;
 	for (i = 1; i < fhp->fh_fnum; i++, fxp++) {
 		if (*cfxp != fixup) {
 			printf("ntfs_procfixups: fixup %d doesn't match\n", i);
 			return (EINVAL);
 		}
 		*cfxp = *fxp;
 		((caddr_t) cfxp) += ntmp->ntm_bps;
 	}
 	return (0);
 }
 
 #if UNUSED_CODE
 int
 ntfs_runtocn(
 	     cn_t * cn,	
 	     struct ntfsmount * ntmp,
 	     u_int8_t * run,
 	     u_long len,
 	     cn_t vcn)
 {
 	cn_t            ccn = 0;
 	cn_t            ccl = 0;
 	u_long          off = 0;
 	int             error = 0;
 
 #if NTFS_DEBUG
 	int             i;
 	printf("ntfs_runtocn: run: 0x%p, %ld bytes, vcn:%ld\n",
 		run, len, (u_long) vcn);
 	printf("ntfs_runtocn: run: ");
 	for (i = 0; i < len; i++)
 		printf("0x%02x ", run[i]);
 	printf("\n");
 #endif
 
 	if (NULL == run) {
 		printf("ntfs_runtocn: run == NULL\n");
 		return (EINVAL);
 	}
 	do {
 		if (run[off] == 0) {
 			printf("ntfs_runtocn: vcn too big\n");
 			return (E2BIG);
 		}
 		vcn -= ccl;
 		error = ntfs_parserun(&ccn, &ccl, run, len, &off);
 		if (error) {
 			printf("ntfs_runtocn: ntfs_parserun failed\n");
 			return (error);
 		}
 	} while (ccl <= vcn);
 	*cn = ccn + vcn;
 	return (0);
 }
 #endif
Index: head/sys/fs/ntfs/ntfs_vfsops.c
===================================================================
--- head/sys/fs/ntfs/ntfs_vfsops.c	(revision 49534)
+++ head/sys/fs/ntfs/ntfs_vfsops.c	(revision 49535)
@@ -1,996 +1,994 @@
 /*	$NetBSD: ntfs_vfsops.c,v 1.2 1999/05/06 15:43:20 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_vfsops.c,v 1.6 1999/05/12 09:43:04 semenu Exp $
+ *	$Id: ntfs_vfsops.c,v 1.7 1999/05/31 11:28:30 phk Exp $
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
-
-#include <miscfs/specfs/specdev.h>
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_vfsops.h>
 #include <ntfs/ntfs_ihash.h>
 #include <ntfs/ntfs_extern.h>
 #include <ntfs/ntfsmount.h>
 
 #if defined(__FreeBSD__)
 MALLOC_DEFINE(M_NTFSMNT, "NTFS mount", "NTFS mount structure");
 MALLOC_DEFINE(M_NTFSNTNODE,"NTFS ntnode",  "NTFS ntnode information");
 MALLOC_DEFINE(M_NTFSFNODE,"NTFS fnode",  "NTFS fnode information");
 MALLOC_DEFINE(M_NTFSDIR,"NTFS dir",  "NTFS dir buffer");
 #endif
 
 #if defined(__FreeBSD__)
 static int	ntfs_mount __P((struct mount *, char *, caddr_t,
 				struct nameidata *, struct proc *));
 #else
 static int	ntfs_mount __P((struct mount *, const char *, void *,
 				struct nameidata *, struct proc *));
 #endif
 static int	ntfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				   struct proc *));
 static int	ntfs_root __P((struct mount *, struct vnode **));
 static int	ntfs_start __P((struct mount *, int, struct proc *));
 static int	ntfs_statfs __P((struct mount *, struct statfs *,
 				 struct proc *));
 static int	ntfs_sync __P((struct mount *, int, struct ucred *,
 			       struct proc *));
 static int	ntfs_unmount __P((struct mount *, int, struct proc *));
 static int	ntfs_vget __P((struct mount *mp, ino_t ino,
 			       struct vnode **vpp));
 static int	ntfs_mountfs __P((register struct vnode *, struct mount *, 
 				  struct ntfs_args *, struct proc *));
 static int	ntfs_vptofh __P((struct vnode *, struct fid *));
 
 #if defined(__FreeBSD__)
 static int	ntfs_init __P((struct vfsconf *));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct sockaddr *, struct vnode **,
 				 int *, struct ucred **));
 #elif defined(__NetBSD__)
 static void	ntfs_init __P((void));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct vnode **));
 static int	ntfs_checkexp __P((struct mount *, struct mbuf *,
 				   int *, struct ucred **));
 static int	ntfs_mountroot __P((void));
 static int	ntfs_sysctl __P((int *, u_int, void *, size_t *, void *,
 				 size_t, struct proc *));
 #else
 static int	ntfs_init __P((void));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct mbuf *, struct vnode **,
 				 int *, struct ucred **));
 #endif
 
 #ifdef __NetBSD__
 /*ARGSUSED*/
 static int
 ntfs_checkexp(mp, nam, exflagsp, credanonp)
 	register struct mount *mp;
 	struct mbuf *nam;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 
 	return (EINVAL);
 }
 
 /*ARGSUSED*/
 static int
 ntfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
 	int *name;
 	u_int namelen;
 	void *oldp;
 	size_t *oldlenp;
 	void *newp;
 	size_t newlen;
 	struct proc *p;
 {
 	return (EINVAL);
 }
 
 static int
 ntfs_mountroot()
 {
 	return (EINVAL);
 }
 #endif
 
 #if defined(__FreeBSD__)
 static int
 ntfs_init (
 	struct vfsconf *vcp )
 #elif defined(__NetBSD__)
 static void
 ntfs_init ()
 #else
 static int
 ntfs_init ()
 #endif
 {
 	ntfs_nthashinit();
 #if !defined(__NetBSD__)
 	return 0;
 #endif
 }
 
 static int
 ntfs_mount ( 
 	struct mount *mp,
 #if defined(__FreeBSD__)
 	char *path,
 	caddr_t data,
 #else
 	const char *path,
 	void *data,
 #endif
 	struct nameidata *ndp,
 	struct proc *p )
 {
 	u_int		size;
 	int		err = 0;
 	struct vnode	*devvp;
 	struct ntfs_args args;
 
 	/*
 	 * Use NULL path to flag a root mount
 	 */
 	if( path == NULL) {
 		/*
 		 ***
 		 * Mounting root file system
 		 ***
 		 */
 	
 		/* Get vnode for root device*/
 		if( bdevvp( rootdev, &rootvp))
 			panic("ffs_mountroot: can't setup bdevvp for root");
 
 		/*
 		 * FS specific handling
 		 */
 		mp->mnt_flag |= MNT_RDONLY;	/* XXX globally applicable?*/
 
 		/*
 		 * Attempt mount
 		 */
 		if( ( err = ntfs_mountfs(rootvp, mp, &args, p)) != 0) {
 			/* fs specific cleanup (if any)*/
 			goto error_1;
 		}
 
 		goto dostatfs;		/* success*/
 
 	}
 
 	/*
 	 ***
 	 * Mounting non-root file system or updating a file system
 	 ***
 	 */
 
 	/* copy in user arguments*/
 	err = copyin(data, (caddr_t)&args, sizeof (struct ntfs_args));
 	if (err)
 		goto error_1;		/* can't get arguments*/
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		printf("ntfs_mount(): MNT_UPDATE not supported\n");
 		err = EINVAL;
 		goto error_1;
 
 #if 0
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		err = 0;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (vfs_busy(mp)) {
 				err = EBUSY;
 				goto error_1;
 			}
 			err = ffs_flushfiles(mp, flags, p);
 			vfs_unbusy(mp);
 		}
 		if (!err && (mp->mnt_flag & MNT_RELOAD))
 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (err) {
 			goto error_1;
 		}
 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
 			if (!fs->fs_clean) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
 				} else {
 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
 					    fs->fs_fsmnt);
 					err = EPERM;
 					goto error_1;
 				}
 			}
 			fs->fs_ronly = 0;
 		}
 		if (fs->fs_ronly == 0) {
 			fs->fs_clean = 0;
 			ffs_sbupdate(ump, MNT_WAIT);
 		}
 		/* if not updating name...*/
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.  Jumping to "success"
 			 * will return the vfs_export() error code.
 			 */
 			err = vfs_export(mp, &ump->um_export, &args.export);
 			goto success;
 		}
 #endif
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	err = namei(ndp);
 	if (err) {
 		/* can't get devvp!*/
 		goto error_1;
 	}
 
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		err = ENOTBLK;
 		goto error_2;
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		err = ENXIO;
 		goto error_2;
 	}
 	if (mp->mnt_flag & MNT_UPDATE) {
 #if 0
 		/*
 		 ********************
 		 * UPDATE
 		 ********************
 		 */
 
 		if (devvp != ntmp->um_devvp)
 			err = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 		/*
 		 * Update device name only on success
 		 */
 		if( !err) {
 			/* Save "mounted from" info for mount point (NULL pad)*/
 			copyinstr(	args.fspec,
 					mp->mnt_stat.f_mntfromname,
 					MNAMELEN - 1,
 					&size);
 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 		}
 #endif
 	} else {
 		/*
 		 ********************
 		 * NEW MOUNT
 		 ********************
 		 */
 
 		/*
 		 * Since this is a new mount, we want the names for
 		 * the device and the mount point copied in.  If an
 		 * error occurs,  the mountpoint is discarded by the
 		 * upper level code.
 		 */
 		/* Save "last mounted on" info for mount point (NULL pad)*/
 		copyinstr(	path,				/* mount point*/
 				mp->mnt_stat.f_mntonname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 
 		/* Save "mounted from" info for mount point (NULL pad)*/
 		copyinstr(	args.fspec,			/* device name*/
 				mp->mnt_stat.f_mntfromname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 
 		err = ntfs_mountfs(devvp, mp, &args, p);
 	}
 	if (err) {
 		goto error_2;
 	}
 
 dostatfs:
 	/*
 	 * Initialize FS stat information in mount struct; uses both
 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
 	 *
 	 * This code is common to root and non-root mounts
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 
 	goto success;
 
 
 error_2:	/* error with devvp held*/
 
 	/* release devvp before failing*/
 	vrele(devvp);
 
 error_1:	/* no state to back out*/
 
 success:
 	return( err);
 }
 
 /*
  * Common code for mount and mountroot
  */
 int
 ntfs_mountfs(devvp, mp, argsp, p)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct ntfs_args *argsp;
 	struct proc *p;
 {
 	struct buf *bp;
 	struct ntfsmount *ntmp;
 	dev_t dev = devvp->v_rdev;
 	int error, ronly, ncount, i;
 	struct vnode *vp;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 #if defined(__FreeBSD__)
 	if (devvp->v_object)
 		ncount -= 1;
 #endif
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
 #if defined(__FreeBSD__)
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 #else
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 #endif
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp = NULL;
 
 	error = bread(devvp, BBLOCK, BBSIZE, NOCRED, &bp);
 	if (error)
 		goto out;
 	ntmp = malloc( sizeof *ntmp, M_NTFSMNT, M_WAITOK );
 	bzero( ntmp, sizeof *ntmp );
 	bcopy( bp->b_data, &ntmp->ntm_bootfile, sizeof(struct bootfile) );
 	brelse( bp );
 	bp = NULL;
 
 	if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) {
 		error = EINVAL;
 		printf("ntfs_mountfs: invalid boot block\n");
 		goto out;
 	}
 
 	{
 		int8_t cpr = ntmp->ntm_mftrecsz;
 		if( cpr > 0 )
 			ntmp->ntm_bpmftrec = ntmp->ntm_spc * cpr;
 		else
 			ntmp->ntm_bpmftrec = (1 << (-cpr)) / ntmp->ntm_bps;
 	}
 	dprintf(("ntfs_mountfs(): bps: %d, spc: %d, media: %x, mftrecsz: %d (%d sects)\n",
 		ntmp->ntm_bps,ntmp->ntm_spc,ntmp->ntm_bootfile.bf_media,
 		ntmp->ntm_mftrecsz,ntmp->ntm_bpmftrec));
 	dprintf(("ntfs_mountfs(): mftcn: 0x%x|0x%x\n",
 		(u_int32_t)ntmp->ntm_mftcn,(u_int32_t)ntmp->ntm_mftmirrcn));
 
 	ntmp->ntm_mountp = mp;
 	ntmp->ntm_dev = dev;
 	ntmp->ntm_devvp = devvp;
 	ntmp->ntm_uid = argsp->uid;
 	ntmp->ntm_gid = argsp->gid;
 	ntmp->ntm_mode = argsp->mode;
 	ntmp->ntm_flag = argsp->flag;
 	mp->mnt_data = (qaddr_t)ntmp;
 
 	dprintf(("ntfs_mountfs(): case-%s,%s uid: %d, gid: %d, mode: %o\n",
 		(ntmp->ntm_flag & NTFS_MFLAG_CASEINS)?"insens.":"sens.",
 		(ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES)?" allnames,":"",
 		ntmp->ntm_uid, ntmp->ntm_gid, ntmp->ntm_mode));
 
 	/*
 	 * We read in some system nodes to do not allow 
 	 * reclaim them and to have everytime access to them.
 	 */ 
 	{
 		int pi[3] = { NTFS_MFTINO, NTFS_ROOTINO, NTFS_BITMAPINO };
 		for (i=0; i<3; i++) {
 			error = VFS_VGET(mp, pi[i], &(ntmp->ntm_sysvn[pi[i]]));
 			if(error)
 				goto out1;
 			ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM;
 			VREF(ntmp->ntm_sysvn[pi[i]]);
 			vput(ntmp->ntm_sysvn[pi[i]]);
 		}
 	}
 
 	/*
 	 * Read in WHOLE lowcase -> upcase translation
 	 * file.
 	 */
 	MALLOC(ntmp->ntm_upcase, wchar *, 65536 * sizeof(wchar),
 		M_NTFSMNT, M_WAITOK);
 
 	error = VFS_VGET(mp, NTFS_UPCASEINO, &vp);
 	if(error) 
 		goto out1;
 	error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			0, 65536*sizeof(wchar), ntmp->ntm_upcase);
 	vput(vp);
 	if(error) 
 		goto out1;
 
 	/*
 	 * Scan $BitMap and count free clusters
 	 */
 	error = ntfs_calccfree(ntmp, &ntmp->ntm_cfree);
 	if(error)
 		goto out1;
 
 	/*
 	 * Read and translate to internal format attribute
 	 * definition file. 
 	 */
 	{
 		int num,j;
 		struct attrdef ad;
 
 		/* Open $AttrDef */
 		error = VFS_VGET(mp, NTFS_ATTRDEFINO, &vp );
 		if(error) 
 			goto out1;
 
 		/* Count valid entries */
 		for(num=0;;num++) {
 			error = ntfs_readattr(ntmp, VTONT(vp),
 					NTFS_A_DATA, NULL,
 					num * sizeof(ad), sizeof(ad),
 					&ad);
 			if (error)
 				goto out1;
 			if (ad.ad_name[0] == 0)
 				break;
 		}
 
 		/* Alloc memory for attribute definitions */
 		MALLOC(ntmp->ntm_ad, struct ntvattrdef *,
 			num * sizeof(struct ntvattrdef),
 			M_NTFSMNT, M_WAITOK);
 
 		ntmp->ntm_adnum = num;
 
 		/* Read them and translate */
 		for(i=0;i<num;i++){
 			error = ntfs_readattr(ntmp, VTONT(vp),
 					NTFS_A_DATA, NULL,
 					i * sizeof(ad), sizeof(ad),
 					&ad);
 			if (error)
 				goto out1;
 			j = 0;
 			do {
 				ntmp->ntm_ad[i].ad_name[j] = ad.ad_name[j];
 			} while(ad.ad_name[j++]);
 			ntmp->ntm_ad[i].ad_namelen = j - 1;
 			ntmp->ntm_ad[i].ad_type = ad.ad_type;
 		}
 
 		vput(vp);
 	}
 
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 #if defined(__FreeBSD__)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 #else
 	mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_NTFS);
 #endif
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 #if defined(__FreeBSD__)
 	devvp->v_specmountpoint = mp;
 #else
 	devvp->v_specflags |= SI_MOUNTEDON;
 #endif
 	return (0);
 
 out1:
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]);
 
 	if (vflush(mp,NULLVP,0))
 		printf("ntfs_mountfs: vflush failed\n");
 
 out:
 #if defined(__FreeBSD__)
 	devvp->v_specmountpoint = NULL;
 #else
 	devvp->v_specflags &= ~SI_MOUNTEDON;
 #endif
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	return (error);
 }
 
 static int
 ntfs_start (
 	struct mount *mp,
 	int flags,
 	struct proc *p )
 {
 	return (0);
 }
 
 static int
 ntfs_unmount( 
 	struct mount *mp,
 	int mntflags,
 	struct proc *p)
 {
 	register struct ntfsmount *ntmp;
 	int error, ronly = 0, flags, i;
 
 	dprintf(("ntfs_unmount: unmounting...\n"));
 	ntmp = VFSTONTFS(mp);
 
 	flags = 0;
 	if(mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 
 	dprintf(("ntfs_unmount: vflushing...\n"));
 	error = vflush(mp,NULLVP,flags | SKIPSYSTEM);
 	if (error) {
 		printf("ntfs_unmount: vflush failed: %d\n",error);
 		return (error);
 	}
 
 	/* Check if only system vnodes are rest */
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		 if((ntmp->ntm_sysvn[i]) && 
 		    (ntmp->ntm_sysvn[i]->v_usecount > 1)) return (EBUSY);
 
 	/* Derefernce all system vnodes */
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		 if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]);
 
 	/* vflush system vnodes */
 	error = vflush(mp,NULLVP,flags);
 	if (error)
 		printf("ntfs_unmount: vflush failed(sysnodes): %d\n",error);
 
 #if defined(__FreeBSD__)
 	ntmp->ntm_devvp->v_specmountpoint = NULL;
 #else
 	ntmp->ntm_devvp->v_specflags &= ~SI_MOUNTEDON;
 #endif
 
 	vinvalbuf(ntmp->ntm_devvp, V_SAVE, NOCRED, p, 0, 0);
 	error = VOP_CLOSE(ntmp->ntm_devvp, ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 
 	vrele(ntmp->ntm_devvp);
 
 	dprintf(("ntfs_umount: freeing memory...\n"));
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	FREE(ntmp->ntm_ad, M_NTFSMNT);
 	FREE(ntmp->ntm_upcase, M_NTFSMNT);
 	FREE(ntmp, M_NTFSMNT);
 	return (error);
 }
 
 static int
 ntfs_root(
 	struct mount *mp,
 	struct vnode **vpp )
 {
 	struct vnode *nvp;
 	int error = 0;
 
 	dprintf(("ntfs_root(): sysvn: %p\n",
 		VFSTONTFS(mp)->ntm_sysvn[NTFS_ROOTINO]));
 	error = VFS_VGET(mp, (ino_t)NTFS_ROOTINO, &nvp);
 	if(error) {
 		printf("ntfs_root: VFS_VGET failed: %d\n",error);
 		return (error);
 	}
 
 	*vpp = nvp;
 	return (0);
 }
 
 static int
 ntfs_quotactl ( 
 	struct mount *mp,
 	int cmds,
 	uid_t uid,
 	caddr_t arg,
 	struct proc *p)
 {
 	printf("\nntfs_quotactl():\n");
 	return EOPNOTSUPP;
 }
 
 int
 ntfs_calccfree(
 	struct ntfsmount *ntmp,
 	cn_t *cfreep)
 {
 	struct vnode *vp;
 	u_int8_t *tmp;
 	int j, error;
 	long cfree = 0;
 	size_t bmsize, i;
 
 	vp = ntmp->ntm_sysvn[NTFS_BITMAPINO];
 
 	bmsize = VTOF(vp)->f_size;
 
 	MALLOC(tmp, u_int8_t *, bmsize, M_TEMP, M_WAITOK);
 
 	error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			       0, bmsize, tmp);
 	if(error) {
 		FREE(tmp, M_TEMP);
 		return (error);
 	}
 
 	for(i=0;i<bmsize;i++)
 		for(j=0;j<8;j++)
 			if(~tmp[i] & (1 << j)) cfree++;
 
 	FREE(tmp, M_TEMP);
 
 	*cfreep = cfree;
 
 	return(0);
 }
 
 static int
 ntfs_statfs(
 	struct mount *mp,
 	struct statfs *sbp,
 	struct proc *p)
 {
 	struct ntfsmount *ntmp = VFSTONTFS(mp);
 	u_int64_t mftsize,mftallocated;
 
 	dprintf(("ntfs_statfs():\n"));
 
 	mftsize = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_size;
 	mftallocated = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_allocated;
 
 #if defined(__FreeBSD__)
 	sbp->f_type = mp->mnt_vfc->vfc_typenum;
 #elif defined(__NetBSD__)
 	sbp->f_type = 0;
 #else
 	sbp->f_type = MOUNT_NTFS;
 #endif
 	sbp->f_bsize = ntmp->ntm_bps;
 	sbp->f_iosize = ntmp->ntm_bps * ntmp->ntm_spc;
 	sbp->f_blocks = ntmp->ntm_bootfile.bf_spv;
 	sbp->f_bfree = sbp->f_bavail = ntfs_cntobn(ntmp->ntm_cfree);
 	sbp->f_ffree = sbp->f_bfree / ntmp->ntm_bpmftrec;
 	sbp->f_files = mftallocated / ntfs_bntob(ntmp->ntm_bpmftrec) +
 		       sbp->f_ffree;
 	if (sbp != &mp->mnt_stat) {
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	sbp->f_flags = mp->mnt_flag;
 	
 	return (0);
 }
 
 static int
 ntfs_sync (
 	struct mount *mp,
 	int waitfor,
 	struct ucred *cred,
 	struct proc *p)
 {
 	/*dprintf(("ntfs_sync():\n"));*/
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 ntfs_fhtovp(
 #if defined(__FreeBSD__)
 	struct mount *mp,
 	struct fid *fhp,
 	struct sockaddr *nam,
 	struct vnode **vpp,
 	int *exflagsp,
 	struct ucred **credanonp)
 #elif defined(__NetBSD__)
 	struct mount *mp,
 	struct fid *fhp,
 	struct vnode **vpp)
 #else
 	struct mount *mp,
 	struct fid *fhp,
 	struct mbuf *nam,
 	struct vnode **vpp,
 	int *exflagsp,
 	struct ucred **credanonp)
 #endif
 {
 	printf("\ntfs_fhtovp():\n");
 	return 0;
 }
 
 static int
 ntfs_vptofh(
 	struct vnode *vp,
 	struct fid *fhp)
 {
 	printf("ntfs_vptofh():\n");
 	return EOPNOTSUPP;
 }
 
 int
 ntfs_vgetex(
 	struct mount *mp,
 	ino_t ino,
 	u_int32_t attrtype,
 	char *attrname,
 	u_long lkflags,
 	u_long flags,
 	struct proc *p,
 	struct vnode **vpp) 
 {
 	int error;
 	register struct ntfsmount *ntmp;
 	struct ntnode *ip;
 	struct fnode *fp;
 	struct vnode *vp;
 
 	dprintf(("ntfs_vgetex: ino: %d, attr: 0x%x:%s, lkf: 0x%x, f: 0x%x\n",
 		ino, attrtype, attrname?attrname:"", lkflags, flags ));
 
 	ntmp = VFSTONTFS(mp);
 	*vpp = NULL;
 
 	/* Get ntnode */
 	error = ntfs_ntlookup(ntmp, ino, &ip);
 	if (error) {
 		printf("ntfs_vget: ntfs_ntget failed\n");
 		return (error);
 	}
 
 	/* It may be not initialized fully, so force load it */
 	if (!(flags & VG_DONTLOADIN) && !(ip->i_flag & IN_LOADED)) {
 		error = ntfs_loadntnode(ntmp, ip);
 		if(error) {
 			printf("ntfs_vget: CAN'T LOAD ATTRIBUTES FOR INO: %d\n",
 			       ip->i_number);
 			ntfs_ntput(ip);
 			return (error);
 		}
 	}
 
 	error = ntfs_fget(ntmp, ip, attrtype, attrname, &fp);
 	if (error) {
 		printf("ntfs_vget: ntfs_fget failed\n");
 		ntfs_ntput(ip);
 		return (error);
 	}
 
 	if (!(flags & VG_DONTVALIDFN) && !(fp->f_flag & FN_VALID)) {
 		if ((ip->i_frflag & NTFS_FRFLAG_DIR) &&
 		    (fp->f_attrtype == 0x80 && fp->f_attrname == NULL)) {
 			fp->f_type = VDIR;
 		} else if(flags & VG_EXT) {
 			fp->f_type = VNON;
 
 			fp->f_size =fp->f_allocated = 0;
 		} else {
 			fp->f_type = VREG;	
 
 			error = ntfs_filesize(ntmp, fp, 
 					      &fp->f_size, &fp->f_allocated);
 			if (error) {
 				ntfs_ntput(ip);
 				return (error);
 			}
 		}
 
 		fp->f_flag |= FN_VALID;
 	}
 
 	if (FTOV(fp)) {
 		VGET(FTOV(fp), lkflags, p);
 		*vpp = FTOV(fp);
 		ntfs_ntput(ip);
 		return (0);
 	}
 
 	error = getnewvnode(VT_NTFS, ntmp->ntm_mountp, ntfs_vnodeop_p, &vp);
 	if(error) {
 		ntfs_frele(fp);
 		ntfs_ntput(ip);
 		return (error);
 	}
 	dprintf(("ntfs_vget: vnode: %p for ntnode: %d\n", vp,ino));
 
 	lockinit(&fp->f_lock, PINOD, "fnode", 0, 0);
 	fp->f_vp = vp;
 	vp->v_data = fp;
 	vp->v_type = fp->f_type;
 
 	if (ino == NTFS_ROOTINO)
 		vp->v_flag |= VROOT;
 
 	ntfs_ntput(ip);
 
 	if (lkflags & LK_TYPE_MASK) {
 		error = VN_LOCK(vp, lkflags, p);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 	}
 
 	VREF(fp->f_devvp);
 	*vpp = vp;
 	return (0);
 	
 }
 
 static int
 ntfs_vget(
 	struct mount *mp,
 	ino_t ino,
 	struct vnode **vpp) 
 {
 	return ntfs_vgetex(mp, ino, NTFS_A_DATA, NULL,
 			   LK_EXCLUSIVE, 0, curproc, vpp);
 }
 
 #if defined(__FreeBSD__)
 static struct vfsops ntfs_vfsops = {
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 	NULL
 };
 VFS_SET(ntfs_vfsops, ntfs, 0);
 #elif defined(__NetBSD__)
 extern struct vnodeopv_desc ntfs_vnodeop_opv_desc;
 
 struct vnodeopv_desc *ntfs_vnodeopv_descs[] = {
 	&ntfs_vnodeop_opv_desc,
 	NULL,
 };
 
 struct vfsops ntfs_vfsops = {
 	MOUNT_NTFS,
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 	ntfs_sysctl,
 	ntfs_mountroot,
 	ntfs_checkexp,
 	ntfs_vnodeopv_descs,
 };
 #else
 static struct vfsops ntfs_vfsops = {
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 };
 VFS_SET(ntfs_vfsops, ntfs, MOUNT_NTFS, 0);
 #endif
 
 
Index: head/sys/fs/ntfs/ntfs_vnops.c
===================================================================
--- head/sys/fs/ntfs/ntfs_vnops.c	(revision 49534)
+++ head/sys/fs/ntfs/ntfs_vnops.c	(revision 49535)
@@ -1,1030 +1,1029 @@
 /*	$NetBSD: ntfs_vnops.c,v 1.2 1999/05/06 15:43:20 christos Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * John Heidemann of the UCLA Ficus project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_vnops.c,v 1.4 1999/05/11 19:54:52 phk Exp $
+ *	$Id: ntfs_vnops.c,v 1.5 1999/05/12 09:43:06 semenu Exp $
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/dirent.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #if defined(__FreeBSD__)
 #include <vm/vnode_pager.h>
 #endif
 #include <vm/vm_extern.h>
 
 #include <sys/sysctl.h>
 
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_extern.h>
-#include <miscfs/specfs/specdev.h>
 
 static int	ntfs_bypass __P((struct vop_generic_args *ap));
 static int	ntfs_read __P((struct vop_read_args *));
 static int	ntfs_write __P((struct vop_write_args *ap));
 static int	ntfs_getattr __P((struct vop_getattr_args *ap));
 static int	ntfs_inactive __P((struct vop_inactive_args *ap));
 static int	ntfs_print __P((struct vop_print_args *ap));
 static int	ntfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	ntfs_strategy __P((struct vop_strategy_args *ap));
 #if defined(__NetBSD__)
 static int	ntfs_islocked __P((struct vop_islocked_args *ap));
 static int	ntfs_unlock __P((struct vop_unlock_args *ap));
 static int	ntfs_lock __P((struct vop_lock_args *ap));
 #endif
 static int	ntfs_access __P((struct vop_access_args *ap));
 static int	ntfs_open __P((struct vop_open_args *ap));
 static int	ntfs_close __P((struct vop_close_args *ap));
 static int	ntfs_readdir __P((struct vop_readdir_args *ap));
 static int	ntfs_lookup __P((struct vop_lookup_args *ap));
 static int	ntfs_bmap __P((struct vop_bmap_args *ap));
 #if defined(__FreeBSD__)
 static int	ntfs_getpages __P((struct vop_getpages_args *ap));
 static int	ntfs_putpages __P((struct vop_putpages_args *));
 #endif
 static int	ntfs_fsync __P((struct vop_fsync_args *ap));
 
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
 #if defined(__FreeBSD__)
 int
 ntfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 int
 ntfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 #endif
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 int
 ntfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	dprintf(("ntfs_bmap: vn: %p, blk: %d\n", ap->a_vp,(u_int32_t)ap->a_bn));
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 #if !defined(__NetBSD__)
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 #endif
 	return (0);
 }
 
 static int
 ntfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int8_t *data;
 	u_int64_t toread;
 	int error;
 
 	dprintf(("ntfs_read: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	toread = fp->f_size;
 
 	dprintf(("ntfs_read: filesize: %d",(u_int32_t)toread));
 
 	toread = min( uio->uio_resid, toread - uio->uio_offset );
 
 	dprintf((", toread: %d\n",(u_int32_t)toread));
 
 	MALLOC(data, u_int8_t *, toread, M_TEMP,M_WAITOK);
 
 	error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, toread, data);
 	if(error) {
 		printf("ntfs_read: ntfs_readattr failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	error = uiomove(data, (int) toread, uio);
 	if(error) {
 		printf("ntfs_read: uiomove failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	FREE(data, M_TEMP);
 
 	return (0);
 }
 
 static int
 ntfs_bypass(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	int error = ENOTTY;
 	dprintf(("ntfs_bypass: %s\n", ap->a_desc->vdesc_name));
 	return (error);
 }
 
 
 static int
 ntfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	register struct vattr *vap = ap->a_vap;
 
 	dprintf(("ntfs_getattr: %d, flags: %d\n",ip->i_number,ip->i_flag));
 
 	vap->va_fsid = dev2udev(fp->f_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
 	vap->va_size = fp->f_size;
 	vap->va_bytes = fp->f_allocated;
 	vap->va_atime = ntfs_nttimetounix(fp->f_times.t_access);
 	vap->va_mtime = ntfs_nttimetounix(fp->f_times.t_write);
 	vap->va_ctime = ntfs_nttimetounix(fp->f_times.t_create);
 	vap->va_flags = ip->i_flag;
 	vap->va_gen = 0;
 	vap->va_blocksize = ip->i_mp->ntm_spc * ip->i_mp->ntm_bps;
 	vap->va_type = fp->f_type;
 	vap->va_filerev = 0;
 	return (0);
 }
 
 
 /*
  * Last reference to an ntnode.  If necessary, write or delete it.
  */
 int
 ntfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 	int error;
 
 	dprintf(("ntfs_inactive: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_inactive: pushing active", vp);
 
 	error = 0;
 
 	VOP__UNLOCK(vp,0,ap->a_p);
 
 	/*
 	 * If we are done with the ntnode, reclaim it
 	 * so that it can be reused immediately.
 	 */
 	if (vp->v_usecount == 0 && ip->i_mode == 0)
 #if defined(__FreeBSD__)
 		vrecycle(vp, (struct simplelock *)0, ap->a_p);
 #else /* defined(__NetBSD__) */
 		vgone(vp);
 #endif
 	return (error);
 }
 
 /*
  * Reclaim an inode so that it can be used for other purposes.
  */
 int
 ntfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	int error;
 
 	dprintf(("ntfs_reclaim: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 #if defined(__FreeBSD__)
 	VOP__UNLOCK(vp,0,ap->a_p);
 #endif
 
 	/* Purge old data structures associated with the inode. */
 	cache_purge(vp);
 	if (fp->f_devvp) {
 		vrele(fp->f_devvp);
 		fp->f_devvp = NULL;
 	}
 
 	ntfs_frele(fp);
 
 	vp->v_data = NULL;
 
 	ntfs_ntput(ip);
 
 	return (0);
 }
 
 static int
 ntfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 /*	printf("[ntfs_print]");*/
 	
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 int
 ntfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct ntfsmount *ntmp = ip->i_mp;
 	int error;
 
 	dprintf(("ntfs_strategy: offset: %d, blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_offset,(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 	dprintf(("strategy: bcount: %d flags: 0x%x\n", 
 		(u_int32_t)bp->b_bcount,bp->b_flags));
 
 	if (bp->b_flags & B_READ) {
 		u_int32_t toread;
 
 		if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
 			clrbuf(bp);
 			error = 0;
 		} else {
 			toread = min(bp->b_bcount,
 				 fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
 				toread,(u_int32_t)fp->f_size));
 
 			error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),
 				toread, bp->b_data);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_readattr failed\n");
 				bp->b_error = error;
 				bp->b_flags |= B_ERROR;
 			}
 
 			bzero(bp->b_data + toread, bp->b_bcount - toread);
 		}
 	} else {
 		size_t tmp;
 		u_int32_t towrite;
 
 		if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
 			printf("ntfs_strategy: CAN'T EXTEND FILE\n");
 			bp->b_error = error = EFBIG;
 			bp->b_flags |= B_ERROR;
 		} else {
 			towrite = min(bp->b_bcount,
 				fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
 				towrite,(u_int32_t)fp->f_size));
 
 			error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,	
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
 				bp->b_data, &tmp);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_writeattr fail\n");
 				bp->b_error = error;
 				bp->b_flags |= B_ERROR;
 			}
 		}
 	}
 	biodone(bp);
 	return (error);
 }
 
 static int
 ntfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int8_t *data;
 	u_int64_t towrite;
 	off_t off;
 	size_t written;
 	int error;
 
 	dprintf(("ntfs_write: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	towrite = fp->f_size;
 
 	dprintf(("ntfs_write: filesize: %d",(u_int32_t)towrite));
 
 	if (uio->uio_resid + uio->uio_offset > towrite) {
 		printf("ntfs_write: CAN'T WRITE BEYOND OF FILE\n");
 		return (EFBIG);
 	}
 
 	towrite = min(uio->uio_resid, towrite - uio->uio_offset);
 	off = uio->uio_offset;
 
 	dprintf((", towrite: %d\n",(u_int32_t)towrite));
 
 	MALLOC(data, u_int8_t *, towrite, M_TEMP,M_WAITOK);
 
 	error = uiomove(data, (int) towrite, uio);
 	if(error) {
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, off, towrite, data, &written);
 	if(error) {
 		printf("ntfs_write: ntfs_writeattr failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	FREE(data, M_TEMP);
 
 	return (0);
 }
 
 #if defined(__NetBSD__)
 /*
  * Check for a locked ntnode.
  */
 int
 ntfs_islocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct ntnode *ip = VTONT(ap->a_vp);
 
 	dprintf(("ntfs_islocked %d\n",ip->i_number));
 
 	if (ip->i_flag & IN_LOCKED)
 		return (1);
 	return (0);
 }
 
 /*
  * Unlock an ntnode.  If WANT bit is on, wakeup.
  */
 int ntfs_lockcount = 90;
 int
 ntfs_unlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct ntnode *ip = VTONT(ap->a_vp);
 #ifdef DIAGNOSTIC
 	struct proc *p = curproc;
 #endif
 
 	dprintf(("ntfs_unlock %d\n",ip->i_number));
 
 #ifdef DIAGNOSTIC
 
 	if ((ip->i_flag & IN_LOCKED) == 0) {
 		vprint("ntfs_unlock: unlocked ntnode", ap->a_vp);
 		panic("ntfs_unlock NOT LOCKED");
 	}
 	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
 	    ip->i_lockholder > -1 && ntfs_lockcount++ < 100)
 		panic("unlocker (%d) != lock holder (%d)",
 		    p->p_pid, ip->i_lockholder);
 #endif
 
 	if (--ip->i_lockcount > 0) {
 		if ((ip->i_flag & IN_RECURSE) == 0)
 			panic("ntfs_unlock: recursive lock prematurely released, pid=%d\n", ip->i_lockholder);
 		return (0);
 	}
 	ip->i_lockholder = 0;
 	ip->i_flag &= ~(IN_LOCKED|IN_RECURSE);
 	if (ip->i_flag & IN_WANTED) {
 		ip->i_flag &= ~IN_WANTED;
 		wakeup((caddr_t)ip);
 	}
 	return (0);
 }
 
 /*
  * Lock an ntnode. If its already locked, set the WANT bit and sleep.
  */
 int
 ntfs_lock(ap)
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct proc *p = curproc;
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	dprintf(("ntfs_lock %d (%d locks)\n",ip->i_number,ip->i_lockcount));
 
 start:
 	while (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		(void) tsleep((caddr_t)vp, PINOD, "ntflk1", 0);
 	}
 	if (vp->v_tag == VT_NON)
 		return (ENOENT);
 	ip = VTONT(vp);
 	if (ip->i_flag & IN_LOCKED) {
 		if (p->p_pid == ip->i_lockholder) {
 			if( (ip->i_flag & IN_RECURSE) == 0)
 				panic("ntfs_lock: recursive lock not expected, pid: %d\n",
 					ip->i_lockholder);
 		} else {
 			ip->i_flag |= IN_WANTED;
 #ifdef DIAGNOSTIC
 			if (p)
 				ip->i_lockwaiter = p->p_pid;
 			else
 				ip->i_lockwaiter = -1;
 #endif
 			(void) tsleep((caddr_t)ip, PINOD, "ntflk2", 0);
 			goto start;
 		}
 	}
 #ifdef DIAGNOSTIC
 	ip->i_lockwaiter = 0;
 	if (((ip->i_flag & IN_RECURSE) == 0) && (ip->i_lockholder != 0))
 		panic("lockholder (%d) != 0", ip->i_lockholder);
 	if (p && p->p_pid == 0)
 		printf("locking by process 0\n");
 #endif
 
 	if ((ip->i_flag & IN_RECURSE) == 0)
 		ip->i_lockcount = 1;
 	else
 		++ip->i_lockcount;
 
 	if (p)
 		ip->i_lockholder = p->p_pid;
 	else
 		ip->i_lockholder = -1;
 	ip->i_flag |= IN_LOCKED;
 	return (0);
 }
 #endif
 
 int
 ntfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct ntnode *ip = VTONT(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 #ifdef QUOTA
 	int error;
 #endif
 
 	dprintf(("ntfs_access: %d\n",ip->i_number));
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch ((int)vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if (error = getinoquota(ip))
 				return (error);
 #endif
 			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 /*
 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
 		return (EPERM);
 */
 
 	/* Otherwise, user id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->i_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->i_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 }
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 static int
 ntfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_open: %d\n",ip->i_number);
 #endif
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ntfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_close: %d\n",ip->i_number);
 #endif
 
 	return (0);
 }
 
 int
 ntfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_ncookies;
 		u_int **cookies;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	int i, error = 0;
 	u_int32_t faked = 0, num;
 	int ncookies = 0;
 	struct dirent cde;
 	off_t off;
 
 	dprintf(("ntfs_readdir %d off: %d resid: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	off = uio->uio_offset;
 
 	/* Simulate . in every dir except ROOT */
 	if( ip->i_number != NTFS_ROOTINO ) {
 		struct dirent dot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 1, "." };
 
 		if( uio->uio_offset < sizeof(struct dirent) ) {
 			dot.d_fileno = ip->i_number;
 			error = uiomove((char *)&dot,sizeof(struct dirent),uio);
 			if(error)
 				return (error);
 
 			ncookies ++;
 		}
 	}
 
 	/* Simulate .. in every dir including ROOT */
 	if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
 		struct dirent dotdot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 2, ".." };
 
 		error = uiomove((char *)&dotdot,sizeof(struct dirent),uio);
 		if(error)
 			return (error);
 
 		ncookies ++;
 	}
 
 	faked = (ip->i_number == NTFS_ROOTINO) ? 1 : 2;
 	num = uio->uio_offset / sizeof(struct dirent) - faked;
 
 	while( uio->uio_resid >= sizeof(struct dirent) ) {
 		struct attr_indexentry *iep;
 
 		error = ntfs_ntreaddir(ntmp, fp, num, &iep);
 
 		if(error)
 			return (error);
 
 		if( NULL == iep )
 			break;
 
 		while( !(iep->ie_flag & NTFS_IEFLAG_LAST) && (uio->uio_resid >= sizeof(struct dirent)) ) {
 
 			if( ntfs_isnamepermitted(ntmp,iep) ) {
 				dprintf(("ntfs_readdir: elem: %d, fname:[",num));
 				for(i=0;i<iep->ie_fnamelen;i++) {
 					cde.d_name[i] = (char)iep->ie_fname[i];
 					dprintf(("%c", cde.d_name[i]));
 				}
 				dprintf(("] type: %d, flag: %d, ",iep->ie_fnametype, iep->ie_flag));
 				cde.d_name[i] = '\0';
 				cde.d_namlen = iep->ie_fnamelen;
 				cde.d_fileno = iep->ie_number;
 				cde.d_type = (iep->ie_fflag & NTFS_FFLAG_DIR) ? DT_DIR : DT_REG;
 				cde.d_reclen = sizeof(struct dirent);
 				dprintf(("%s\n", (cde.d_type == DT_DIR) ? "dir":"reg"));
 
 				error = uiomove((char *)&cde, sizeof(struct dirent), uio);
 				if(error)
 					return (error);
 
 				ncookies++;
 				num++;
 			}
 
 			iep = NTFS_NEXTREC(iep,struct attr_indexentry *);
 		}
 	}
 
 	dprintf(("ntfs_readdir: %d entries (%d bytes) read\n",
 		ncookies,(u_int)(uio->uio_offset - off)));
 	dprintf(("ntfs_readdir: off: %d resid: %d\n",
 		(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dp;
 #if defined(__FreeBSD__)
 		u_long *cookies;
 		u_long *cookiep;
 #else /* defined(__NetBSD__) */
 		off_t *cookies;
 		off_t *cookiep;
 #endif
 
 		printf("ntfs_readdir: %d cookies\n",ncookies);
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ntfs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     ((caddr_t)uio->uio_iov->iov_base -
 			 (uio->uio_offset - off));
 #if defined(__FreeBSD__)
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
 		       M_TEMP, M_WAITOK);
 #else /* defined(__NetBSD__) */
 		MALLOC(cookies, off_t *, ncookies * sizeof(off_t),
 		       M_TEMP, M_WAITOK);
 #endif
 		for (dp = dpStart, cookiep = cookies, i=0;
 		     i < ncookies;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen), i++) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_int) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 /*
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTONT(ap->a_vp)->i_size <= uio->uio_offset;
 */
 	return (error);
 }
 
 int
 ntfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct ntnode *dip = VTONT(dvp);
 	struct ntfsmount *ntmp = dip->i_mp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int error;
 	int lockparent = cnp->cn_flags & LOCKPARENT;
 #if NTFS_DEBUG
 	int wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
 #endif
 	dprintf(("ntfs_lookup: %s (%ld bytes) in %d, lp: %d, wp: %d \n",
 		cnp->cn_nameptr, cnp->cn_namelen,
 		dip->i_number,lockparent, wantparent));
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, cnp->cn_proc);
 	if(error)
 		return (error);
 
 	if( (cnp->cn_namelen == 1) &&
 	    !strncmp(cnp->cn_nameptr,".",1) ) {
 		dprintf(("ntfs_lookup: faking . directory in %d\n",
 			dip->i_number));
 
 		VREF(dvp);
 		*ap->a_vpp = dvp;
 		return (0);
 	} else if( (cnp->cn_namelen == 2) &&
 	    !strncmp(cnp->cn_nameptr,"..",2) &&
 	    (cnp->cn_flags & ISDOTDOT) ) {
 		struct ntvattr *vap;
 
 		dprintf(("ntfs_lookup: faking .. directory in %d\n",
 			 dip->i_number));
 
 		error = ntfs_ntvattrget(ntmp, dip, NTFS_A_NAME, NULL, 0, &vap);
 		if(error)
 			return (error);
 
 		VOP__UNLOCK(dvp,0,cnp->cn_proc);
 
 		dprintf(("ntfs_lookup: parentdir: %d\n",
 			 vap->va_a_name->n_pnumber));
 		error = VFS_VGET(ntmp->ntm_mountp,
 				 vap->va_a_name->n_pnumber,ap->a_vpp); 
 		ntfs_ntvattrrele(vap);
 		if(error) {
 			VOP__LOCK(dvp, 0, cnp->cn_proc);
 			return(error);
 		}
 
 		if( lockparent && (cnp->cn_flags & ISLASTCN) && 
 		    (error = VOP__LOCK(dvp, 0, cnp->cn_proc)) ) {
 			vput( *(ap->a_vpp) );
 			return (error);
 		}
 		return (error);
 	} else {
 		error = ntfs_ntlookupfile(ntmp, dvp, cnp, ap->a_vpp);
 		if(error)
 			return (error);
 
 		dprintf(("ntfs_lookup: found ino: %d\n", 
 			VTONT(*ap->a_vpp)->i_number));
 
 		if(!lockparent || !(cnp->cn_flags & ISLASTCN))
 			VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, *ap->a_vpp, cnp);
 
 	}
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 ntfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	return (0);
 }
 
 /*
  * Global vfs data structures
  */
 vop_t **ntfs_vnodeop_p;
 #if defined(__FreeBSD__)
 static
 #endif
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *)ntfs_bypass },
 
 	{ &vop_getattr_desc, (vop_t *)ntfs_getattr },
 	{ &vop_inactive_desc, (vop_t *)ntfs_inactive },
 	{ &vop_reclaim_desc, (vop_t *)ntfs_reclaim },
 	{ &vop_print_desc, (vop_t *)ntfs_print },
 
 #if defined(__FreeBSD__)
 	{ &vop_islocked_desc, (vop_t *)vop_stdislocked },
 	{ &vop_unlock_desc, (vop_t *)vop_stdunlock },
 	{ &vop_lock_desc, (vop_t *)vop_stdlock },
 	{ &vop_cachedlookup_desc, (vop_t *)ntfs_lookup },
 	{ &vop_lookup_desc, (vop_t *)vfs_cache_lookup },
 #else
 	{ &vop_islocked_desc, (vop_t *)ntfs_islocked },
 	{ &vop_unlock_desc, (vop_t *)ntfs_unlock },
 	{ &vop_lock_desc, (vop_t *)ntfs_lock },
 	{ &vop_lookup_desc, (vop_t *)ntfs_lookup },
 #endif
 
 	{ &vop_access_desc, (vop_t *)ntfs_access },
 	{ &vop_close_desc, (vop_t *)ntfs_close },
 	{ &vop_open_desc, (vop_t *)ntfs_open },
 	{ &vop_readdir_desc, (vop_t *)ntfs_readdir },
 	{ &vop_fsync_desc, (vop_t *)ntfs_fsync },
 
 	{ &vop_bmap_desc, (vop_t *)ntfs_bmap },
 #if defined(__FreeBSD__)
 	{ &vop_getpages_desc, (vop_t *) ntfs_getpages },
 	{ &vop_putpages_desc, (vop_t *) ntfs_putpages },
 #endif
 	{ &vop_strategy_desc, (vop_t *)ntfs_strategy },
 #if defined(__FreeBSD__)
 	{ &vop_bwrite_desc, (vop_t *)vop_stdbwrite },
 #else /* defined(__NetBSD__) */
 	{ &vop_bwrite_desc, (vop_t *)vn_bwrite },
 #endif
 	{ &vop_read_desc, (vop_t *)ntfs_read },
 	{ &vop_write_desc, (vop_t *)ntfs_write },
 
 	{ NULL, NULL }
 };
 
 #if defined(__FreeBSD__)
 static
 #endif
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 #if defined(__FreeBSD__)
 VNODEOP_SET(ntfs_vnodeop_opv_desc);
 #endif
Index: head/sys/fs/specfs/spec_vnops.c
===================================================================
--- head/sys/fs/specfs/spec_vnops.c	(revision 49534)
+++ head/sys/fs/specfs/spec_vnops.c	(revision 49535)
@@ -1,963 +1,961 @@
 /*
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)spec_vnops.c	8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.89 1999/06/26 02:46:21 mckusick Exp $
+ * $Id: spec_vnops.c,v 1.90 1999/07/20 09:47:45 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
-
-#include <miscfs/specfs/specdev.h>
 
 static int	spec_advlock __P((struct vop_advlock_args *));  
 static int	spec_badop __P((void));
 static int	spec_bmap __P((struct vop_bmap_args *));
 static int	spec_close __P((struct vop_close_args *));
 static int	spec_freeblks __P((struct vop_freeblks_args *));
 static int	spec_fsync __P((struct  vop_fsync_args *));
 static int	spec_getattr __P((struct  vop_getattr_args *));
 static int	spec_getpages __P((struct vop_getpages_args *));
 static int	spec_inactive __P((struct  vop_inactive_args *));
 static int	spec_ioctl __P((struct vop_ioctl_args *));
 static int	spec_lookup __P((struct vop_lookup_args *));
 static int	spec_open __P((struct vop_open_args *));
 static int	spec_poll __P((struct vop_poll_args *));
 static int	spec_print __P((struct vop_print_args *));
 static int	spec_read __P((struct vop_read_args *));  
 static int	spec_strategy __P((struct vop_strategy_args *));
 static int	spec_write __P((struct vop_write_args *));
 
 vop_t **spec_vnodeop_p;
 static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) vop_ebadf },
 	{ &vop_advlock_desc,		(vop_t *) spec_advlock },
 	{ &vop_bmap_desc,		(vop_t *) spec_bmap },
 	{ &vop_close_desc,		(vop_t *) spec_close },
 	{ &vop_create_desc,		(vop_t *) spec_badop },
 	{ &vop_freeblks_desc,		(vop_t *) spec_freeblks },
 	{ &vop_fsync_desc,		(vop_t *) spec_fsync },
 	{ &vop_getattr_desc,		(vop_t *) spec_getattr },
 	{ &vop_getpages_desc,		(vop_t *) spec_getpages },
 	{ &vop_inactive_desc,		(vop_t *) spec_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) spec_ioctl },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) spec_badop },
 	{ &vop_lookup_desc,		(vop_t *) spec_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) spec_badop },
 	{ &vop_mknod_desc,		(vop_t *) spec_badop },
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) spec_badop },
 	{ &vop_readlink_desc,		(vop_t *) spec_badop },
 	{ &vop_reallocblks_desc,	(vop_t *) spec_badop },
 	{ &vop_reclaim_desc,		(vop_t *) vop_null },
 	{ &vop_remove_desc,		(vop_t *) spec_badop },
 	{ &vop_rename_desc,		(vop_t *) spec_badop },
 	{ &vop_rmdir_desc,		(vop_t *) spec_badop },
 	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
 	{ &vop_strategy_desc,		(vop_t *) spec_strategy },
 	{ &vop_symlink_desc,		(vop_t *) spec_badop },
 	{ &vop_write_desc,		(vop_t *) spec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
 
 VNODEOP_SET(spec_vnodeop_opv_desc);
 
 
 int
 spec_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 static void spec_getpages_iodone __P((struct buf *bp));
 
 /*
  * Trivial lookup routine that always fails.
  */
 static int
 spec_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * Open a special file.
  */
 /* ARGSUSED */
 static int
 spec_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct proc *p = ap->a_p;
 	struct vnode *bvp, *vp = ap->a_vp;
 	dev_t bdev, dev = vp->v_rdev;
 	int error;
 	struct cdevsw *dsw;
 
 	/*
 	 * Don't allow open if fs is mounted -nodev.
 	 */
 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
 		return (ENXIO);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		dsw = devsw(dev);
 		if ( (dsw == NULL) || (dsw->d_open == NULL))
 			return ENXIO;
 		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
 			/*
 			 * When running in very secure mode, do not allow
 			 * opens for writing of any disk character devices.
 			 */
 			if (securelevel >= 2
 			    && dsw->d_bmaj != -1
 			    && (dsw->d_flags & D_TYPEMASK) == D_DISK)
 				return (EPERM);
 			/*
 			 * When running in secure mode, do not allow opens
 			 * for writing of /dev/mem, /dev/kmem, or character
 			 * devices whose corresponding block devices are
 			 * currently mounted.
 			 */
 			if (securelevel >= 1) {
 				if ((bdev = chrtoblk(dev)) != NODEV &&
 				    vfinddev(bdev, VBLK, &bvp) &&
 				    bvp->v_usecount > 0 &&
 				    (error = vfs_mountedon(bvp)))
 					return (error);
 				if (iskmemdev(dev))
 					return (EPERM);
 			}
 		}
 		if ((dsw->d_flags & D_TYPEMASK) == D_TTY)
 			vp->v_flag |= VISTTY;
 		VOP_UNLOCK(vp, 0, p);
 		error = (*dsw->d_open)(dev, ap->a_mode, S_IFCHR, p);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 		/* NOT REACHED */
 	case VBLK:
 		dsw = bdevsw(dev);
 		if ( (dsw == NULL) || (dsw->d_open == NULL))
 			return ENXIO;
 		/*
 		 * When running in very secure mode, do not allow
 		 * opens for writing of any disk block devices.
 		 */
 		if (securelevel >= 2 && ap->a_cred != FSCRED &&
 		    (ap->a_mode & FWRITE) &&
 		    (dsw->d_flags & D_TYPEMASK) == D_DISK)
 			return (EPERM);
 
 		/*
 		 * Do not allow opens of block devices that are
 		 * currently mounted.
 		 */
 		error = vfs_mountedon(vp);
 		if (error)
 			return (error);
 		return ((*dsw->d_open)(dev, ap->a_mode, S_IFBLK, p));
 		/* NOT REACHED */
 	default:
 		break;
 	}
 	return (0);
 }
 
 /*
  * Vnode op for read
  */
 /* ARGSUSED */
 static int
 spec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
  	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn, nextbn;
 	long bsize, bscale;
 	struct partinfo dpart;
 	int n, on;
 	d_ioctl_t *ioctl;
 	int error = 0;
 	dev_t dev;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("spec_read mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_read proc");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*devsw(vp->v_rdev)->d_read)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		dev = vp->v_rdev;
 
 		/*
 		 * Calculate block size for block device.  The block size must
 		 * be larger then the physical minimum.
 		 */
 
 		bsize = vp->v_specinfo->si_bsize_best;
 
 		if ((ioctl = bdevsw(dev)->d_ioctl) != NULL &&
 		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
 		    dpart.part->p_fstype == FS_BSDFFS &&
 		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		bscale = btodb(bsize);
 		do {
 			bn = btodb(uio->uio_offset) & ~(bscale - 1);
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (vp->v_lastr + bscale == bn) {
 				nextbn = bn + bscale;
 				error = breadn(vp, bn, (int)bsize, &nextbn,
 					(int *)&bsize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
 			vp->v_lastr = bn;
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			brelse(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_read type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Vnode op for write
  */
 /* ARGSUSED */
 static int
 spec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn;
 	int bsize, blkmask;
 	struct partinfo dpart;
 	register int n, on;
 	int error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("spec_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_write proc");
 #endif
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*devsw(vp->v_rdev)->d_write)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_resid == 0)
 			return (0);
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 
 		/*
 		 * Calculate block size for block device.  The block size must
 		 * be larger then the physical minimum.
 		 */
 		bsize = vp->v_specinfo->si_bsize_best;
 
 		if ((*bdevsw(vp->v_rdev)->d_ioctl)(vp->v_rdev, DIOCGPART,
 		    (caddr_t)&dpart, FREAD, p) == 0) {
 			if (dpart.part->p_fstype == FS_BSDFFS &&
 			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 				bsize = dpart.part->p_frag *
 				    dpart.part->p_fsize;
 		}
 		blkmask = btodb(bsize) - 1;
 		do {
 			bn = btodb(uio->uio_offset) & ~blkmask;
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (n == bsize)
 				bp = getblk(vp, bn, bsize, 0, 0);
 			else
 				error = bread(vp, bn, bsize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			n = min(n, bsize - bp->b_resid);
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			if (n + on == bsize)
 				bawrite(bp);
 			else
 				bdwrite(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_write type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Device ioctl operation.
  */
 /* ARGSUSED */
 static int
 spec_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		int  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	dev_t dev = ap->a_vp->v_rdev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return ((*devsw(dev)->d_ioctl)(dev, ap->a_command, 
 		    ap->a_data, ap->a_fflag, ap->a_p));
 	case VBLK:
 		return ((*bdevsw(dev)->d_ioctl)(dev, ap->a_command, 
 		    ap->a_data, ap->a_fflag, ap->a_p));
 	default:
 		panic("spec_ioctl");
 		/* NOTREACHED */
 	}
 }
 
 /* ARGSUSED */
 static int
 spec_poll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register dev_t dev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		dev = ap->a_vp->v_rdev;
 		return (*devsw(dev)->d_poll)(dev, ap->a_events, ap->a_p);
 	default:
 		return (vop_defaultop((struct vop_generic_args *)ap));
 
 	}
 }
 /*
  * Synch buffers associated with a block device
  */
 /* ARGSUSED */
 static int
 spec_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int  a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct buf *bp;
 	struct buf *nbp;
 	int s;
 
 	if (vp->v_type == VCHR)
 		return (0);
 	/*
 	 * Flush all dirty buffers associated with a block device.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("spec_fsync: not dirty");
 		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
 			BUF_UNLOCK(bp);
 			vfs_bio_awrite(bp);
 			splx(s);
 		} else {
 			bremfree(bp);
 			splx(s);
 			bawrite(bp);
 		}
 		goto loop;
 	}
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
 		}
 #ifdef DIAGNOSTIC
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			vprint("spec_fsync: dirty", vp);
 			splx(s);
 			goto loop;
 		}
 #endif
 	}
 	splx(s);
 	return (0);
 }
 
 static int
 spec_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Just call the device strategy routine
  */
 static int
 spec_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp;
 
 	bp = ap->a_bp;
 	if (((bp->b_flags & B_READ) == 0) &&
 		(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
 		(*bioops.io_start)(bp);
 	(*bdevsw(bp->b_dev)->d_strategy)(bp);
 	return (0);
 }
 
 static int
 spec_freeblks(ap)
 	struct vop_freeblks_args /* {
 		struct vnode *a_vp;
 		daddr_t a_addr;
 		daddr_t a_length;
 	} */ *ap;
 {
 	struct cdevsw *bsw;
 	struct buf *bp;
 
 	bsw = bdevsw(ap->a_vp->v_rdev);
 	if ((bsw->d_flags & D_CANFREE) == 0)
 		return (0);
 	bp = geteblk(ap->a_length);
 	bp->b_flags |= B_FREEBUF;
 	bp->b_dev = ap->a_vp->v_rdev;
 	bp->b_blkno = ap->a_addr;
 	bp->b_offset = dbtob(ap->a_addr);
 	bp->b_bcount = ap->a_length;
 	(*bsw->d_strategy)(bp);
 	return (0);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 spec_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Device close routine
  */
 /* ARGSUSED */
 static int
 spec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	dev_t dev = vp->v_rdev;
 	d_close_t *devclose;
 	int mode, error;
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		/*
 		 * Hack: a tty device that is a controlling terminal
 		 * has a reference from the session structure.
 		 * We cannot easily tell that a character device is
 		 * a controlling terminal, unless it is the closing
 		 * process' controlling terminal.  In that case,
 		 * if the reference count is 2 (this last descriptor
 		 * plus the session), release the reference from the session.
 		 */
 		if (vcount(vp) == 2 && ap->a_p &&
 		    (vp->v_flag & VXLOCK) == 0 &&
 		    vp == ap->a_p->p_session->s_ttyvp) {
 			vrele(vp);
 			ap->a_p->p_session->s_ttyvp = NULL;
 		}
 		/*
 		 * If the vnode is locked, then we are in the midst
 		 * of forcably closing the device, otherwise we only
 		 * close on last reference.
 		 */
 		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 		devclose = devsw(dev)->d_close;
 		mode = S_IFCHR;
 		break;
 
 	case VBLK:
 		/*
 		 * On last close of a block device (that isn't mounted)
 		 * we must invalidate any in core blocks, so that
 		 * we can, for instance, change floppy disks.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
 		VOP_UNLOCK(vp, 0, ap->a_p);
 		if (error)
 			return (error);
 
 		/*
 		 * We do not want to really close the device if it
 		 * is still in use unless we are trying to close it
 		 * forcibly. Since every use (buffer, vnode, swap, cmap)
 		 * holds a reference to the vnode, and because we mark
 		 * any other vnodes that alias this device, when the
 		 * sum of the reference counts on all the aliased
 		 * vnodes descends to one, we are on last close.
 		 */
 		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 
 		devclose = bdevsw(dev)->d_close;
 		mode = S_IFBLK;
 		break;
 
 	default:
 		panic("spec_close: not special");
 	}
 
 	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 spec_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
 		minor(ap->a_vp->v_rdev));
 	return (0);
 }
 
 /*
  * Special device advisory byte-level locks.
  */
 /* ARGSUSED */
 static int
 spec_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 
 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
 }
 
 /*
  * Special device bad operation
  */
 static int
 spec_badop()
 {
 
 	panic("spec_badop called");
 	/* NOTREACHED */
 }
 
 static void
 spec_getpages_iodone(bp)
 	struct buf *bp;
 {
 
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 static int
 spec_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	vm_offset_t kva;
 	int error;
 	int i, pcount, size, s;
 	daddr_t blkno;
 	struct buf *bp;
 	vm_page_t m;
 	vm_ooffset_t offset;
 	int toff, nextoff, nread;
 	struct vnode *vp = ap->a_vp;
 	int blksiz;
 	int gotreqpage;
 
 	error = 0;
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 
 	/*
 	 * Calculate the offset of the transfer and do sanity check.
 	 * FreeBSD currently only supports an 8 TB range due to b_blkno
 	 * being in DEV_BSIZE ( usually 512 ) byte chunks on call to
 	 * VOP_STRATEGY.  XXX
 	 */
 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
 
 #define	DADDR_T_BIT	(sizeof(daddr_t)*8)
 #define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
 
 	if (offset < 0 || offset > OFFSET_MAX) {
 		/* XXX still no %q in kernel. */
 		printf("spec_getpages: preposterous offset 0x%x%08x\n",
 		       (u_int)((u_quad_t)offset >> 32),
 		       (u_int)(offset & 0xffffffff));
 		return (VM_PAGER_ERROR);
 	}
 
 	blkno = btodb(offset);
 
 	/*
 	 * Round up physical size for real devices.  We cannot round using
 	 * v_mount's block size data because v_mount has nothing to do with
 	 * the device.  i.e. it's usually '/dev'.  We need the physical block
 	 * size for the device itself.
 	 *
 	 * We can't use v_specmountpoint because it only exists when the
 	 * block device is mounted.  However, we can use v_specinfo.
 	 */
 
 	if (vp->v_type == VBLK)
 		blksiz = vp->v_specinfo->si_bsize_phys;
 	else
 		blksiz = DEV_BSIZE;
 
 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
 
 	bp = getpbuf(NULL);
 	kva = (vm_offset_t)bp->b_data;
 
 	/*
 	 * Map the pages to be read into the kva.
 	 */
 	pmap_qenter(kva, ap->a_m, pcount);
 
 	/* Build a minimal buffer header. */
 	bp->b_flags = B_READ | B_CALL;
 	bp->b_iodone = spec_getpages_iodone;
 
 	/* B_PHYS is not set, but it is nice to fill this in. */
 	bp->b_rcred = bp->b_wcred = curproc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = blkno;
 	bp->b_lblkno = blkno;
 	pbgetvp(ap->a_vp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 	bp->b_resid = 0;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += pcount;
 
 	/* Do the input. */
 	VOP_STRATEGY(bp->b_vp, bp);
 
 	s = splbio();
 
 	/* We definitely need to be at splbio here. */
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep(bp, PVM, "spread", 0);
 
 	splx(s);
 
 	if ((bp->b_flags & B_ERROR) != 0) {
 		if (bp->b_error)
 			error = bp->b_error;
 		else
 			error = EIO;
 	}
 
 	nread = size - bp->b_resid;
 
 	if (nread < ap->a_count) {
 		bzero((caddr_t)kva + nread,
 			ap->a_count - nread);
 	}
 	pmap_qremove(kva, pcount);
 
 
 	gotreqpage = 0;
 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= nread) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else if (toff < nread) {
 			/*
 			 * Since this is a VM request, we have to supply the
 			 * unaligned offset to allow vm_page_set_validclean()
 			 * to zero sub-DEV_BSIZE'd portions of the page.
 			 */
 			vm_page_set_validclean(m, 0, nread - toff);
 		} else {
 			m->valid = 0;
 			m->dirty = 0;
 		}
 
 		if (i != ap->a_reqpage) {
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
 				if (m->valid) {
 					if (m->flags & PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 					vm_page_wakeup(m);
 				} else {
 					vm_page_free(m);
 				}
 			} else {
 				vm_page_free(m);
 			}
 		} else if (m->valid) {
 			gotreqpage = 1;
 			/*
 			 * Since this is a VM request, we need to make the
 			 * entire page presentable by zeroing invalid sections.
 			 */
 			if (m->valid != VM_PAGE_BITS_ALL)
 			    vm_page_zero_invalid(m, FALSE);
 		}
 	}
 	if (!gotreqpage) {
 		m = ap->a_m[ap->a_reqpage];
 #ifndef MAX_PERF
 		printf(
 	    "spec_getpages: I/O read failure: (error code=%d) bp %p vp %p\n",
 		    error, bp, bp->b_vp);
 		printf(
 	    "               size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
 		    size, bp->b_resid, ap->a_count, m->valid);
 		printf(
 	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
 		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
 #endif
 		/*
 		 * Free the buffer header back to the swap buffer pool.
 		 */
 		relpbuf(bp, NULL);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Free the buffer header back to the swap buffer pool.
 	 */
 	relpbuf(bp, NULL);
 	return VM_PAGER_OK;
 }
 
 /* ARGSUSED */
 static int
 spec_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	struct partinfo dpart;
 
 	bzero(vap, sizeof (*vap));
 
 	if (vp->v_type == VBLK) {
 		if (vp->v_specinfo)
 			vap->va_blocksize = vp->v_specmountpoint->mnt_stat.f_iosize;
 		else
 			vap->va_blocksize = BLKDEV_IOSIZE;
 	} else if (vp->v_type == VCHR) {
 		vap->va_blocksize = MAXBSIZE;
 	}
 
 	if ((*bdevsw(vp->v_rdev)->d_ioctl)(vp->v_rdev, DIOCGPART,
 	    (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
 		vap->va_bytes = dbtob(dpart.disklab->d_partitions
 				      [minor(vp->v_rdev)].p_size);
 		vap->va_size = vap->va_bytes;
 	}
 	return (0);
 }
Index: head/sys/gnu/ext2fs/ext2_bmap.c
===================================================================
--- head/sys/gnu/ext2fs/ext2_bmap.c	(revision 49534)
+++ head/sys/gnu/ext2fs/ext2_bmap.c	(revision 49535)
@@ -1,355 +1,354 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
- * $Id: ufs_bmap.c,v 1.27 1999/05/07 10:11:36 phk Exp $
+ * $Id: ufs_bmap.c,v 1.28 1999/05/08 06:40:25 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/conf.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
-#include <miscfs/specfs/specdev.h>
 
 /*
  * Bmap converts a the logical block number of a file to its physical block
  * number on the disk. The conversion is done by using the logical block
  * number to index into the array of block pointers described by the dinode.
  */
 int
 ufs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		ufs_daddr_t a_bn;
 		struct vnode **a_vpp;
 		ufs_daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	/*
 	 * Check for underlying vnode requests and ensure that logical
 	 * to physical mapping is requested.
 	 */
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
 	if (ap->a_bnp == NULL)
 		return (0);
 
 	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
 	    ap->a_runp, ap->a_runb));
 }
 
 /*
  * Indirect blocks are now on the vnode for the file.  They are given negative
  * logical block numbers.  Indirect blocks are addressed by the negative
  * address of the first data block to which they point.  Double indirect blocks
  * are addressed by one less than the address of the first indirect block to
  * which they point.  Triple indirect blocks are addressed by one less than
  * the address of the first double indirect block to which they point.
  *
  * ufs_bmaparray does the bmap conversion, and if requested returns the
  * array of logical blocks which must be traversed to get to a block.
  * Each entry contains the offset into that block that gets you to the
  * next block and the disk address of the block (if it is assigned).
  */
 
 int
 ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	ufs_daddr_t *bnp;
 	struct indir *ap;
 	int *nump;
 	int *runp;
 	int *runb;
 {
 	register struct inode *ip;
 	struct buf *bp;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *devvp;
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
 	int error, maxrun = 0, num;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 #ifdef DIAGNOSTIC
 	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
 		panic("ufs_bmaparray: invalid arguments");
 #endif
 
 	if (runp) {
 		*runp = 0;
 	}
 
 	if (runb) {
 		*runb = 0;
 	}
 
 	maxrun = 0;
 	if (runp || runb || (vp->v_maxio == 0)) {
 
 		struct vnode *devvp;
 		int blksize;
 
 		blksize = mp->mnt_stat.f_iosize;
 
 		/*
 		 * XXX
 		 * If MAXPHYS is the largest transfer the disks can handle,
 		 * we probably want maxrun to be 1 block less so that we
 		 * don't create a block larger than the device can handle.
 		 */
 		devvp = ip->i_devvp;
 
 		if (devvp != NULL && devvp->v_tag != VT_MFS &&
 		    devvp->v_type == VBLK) {
 			if (bdevsw(devvp->v_rdev)->d_maxio > MAXPHYS) {
 				maxrun = MAXPHYS;
 				vp->v_maxio = MAXPHYS;
 			} else {
 				maxrun = bdevsw(devvp->v_rdev)->d_maxio;
 				vp->v_maxio = bdevsw(devvp->v_rdev)->d_maxio;
 			}
 			maxrun = maxrun / blksize;
 			maxrun -= 1;
 		}
 
 		if (maxrun <= 0) {
 			vp->v_maxio = DFLTPHYS;
 			maxrun = DFLTPHYS / blksize;
 			maxrun -= 1;
 		}
 	}
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
 		nump = &num;
 	error = ufs_getlbns(vp, bn, xap, nump);
 	if (error)
 		return (error);
 
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
 		if (*bnp == 0)
 			*bnp = -1;
 		else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
 			    ++bn, ++*runp);
 			bn = bnb;
 			if (runb && (bn > 0)) {
 				for (--bn; (bn >= 0) && (*runb < maxrun) &&
 					is_sequential(ump, ip->i_db[bn],
 						ip->i_db[bn+1]);
 						--bn, ++*runb);
 			}
 		}
 		return (0);
 	}
 
 
 	/* Get disk address out of indirect block array */
 	daddr = ip->i_ib[xap->in_off];
 
 	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
 	for (bp = NULL, ++xap; --num; ++xap) {
 		/*
 		 * Exit the loop if there is no disk address assigned yet and
 		 * the indirect block isn't in the cache, or if we were
 		 * looking for an indirect block and we've found it.
 		 */
 
 		metalbn = xap->in_lbn;
 		if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn)
 			break;
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
 		 */
 		if (bp)
 			bqrelse(bp);
 
 		xap->in_exists = 1;
 		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
 		if ((bp->b_flags & B_CACHE) == 0) {
 #ifdef DIAGNOSTIC
 			if (!daddr)
 				panic("ufs_bmaparray: indirect block not in cache");
 #endif
 			bp->b_blkno = blkptrtodb(ump, daddr);
 			bp->b_flags |= B_READ;
 			bp->b_flags &= ~(B_INVAL|B_ERROR);
 			vfs_busy_pages(bp, 0);
 			VOP_STRATEGY(bp->b_vp, bp);
 			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
 			error = biowait(bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off];
 		if (num == 1 && daddr && runp) {
 			for (bn = xap->in_off + 1;
 			    bn < MNINDIR(ump) && *runp < maxrun &&
 			    is_sequential(ump,
 			    ((ufs_daddr_t *)bp->b_data)[bn - 1],
 			    ((ufs_daddr_t *)bp->b_data)[bn]);
 			    ++bn, ++*runp);
 			bn = xap->in_off;
 			if (runb && bn) {
 				for(--bn; bn > 0 && *runb < maxrun &&
 			    		is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
 					    ((daddr_t *)bp->b_data)[bn+1]);
 			    		--bn, ++*runb);
 			}
 		}
 	}
 	if (bp)
 		bqrelse(bp);
 
 	daddr = blkptrtodb(ump, daddr);
 	*bnp = daddr == 0 ? -1 : daddr;
 	return (0);
 }
 
 /*
  * Create an array of logical block number/offset pairs which represent the
  * path of indirect blocks required to access a data block.  The first "pair"
  * contains the logical block number of the appropriate single, double or
  * triple indirect block and the offset into the inode indirect block array.
  * Note, the logical block number of the inode single/double/triple indirect
  * block appears twice in the array, once with the offset into the i_ib and
  * once with the offset into the page itself.
  */
 int
 ufs_getlbns(vp, bn, ap, nump)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	struct indir *ap;
 	int *nump;
 {
 	long blockcnt, metalbn, realbn;
 	struct ufsmount *ump;
 	int i, numlevels, off;
 	int64_t qblockcnt;
 
 	ump = VFSTOUFS(vp->v_mount);
 	if (nump)
 		*nump = 0;
 	numlevels = 0;
 	realbn = bn;
 	if ((long)bn < 0)
 		bn = -(long)bn;
 
 	/* The first NDADDR blocks are direct blocks. */
 	if (bn < NDADDR)
 		return (0);
 
 	/*
 	 * Determine the number of levels of indirection.  After this loop
 	 * is done, blockcnt indicates the number of data blocks possible
 	 * at the previous level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
 	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
 		/*
 		 * Use int64_t's here to avoid overflow for triple indirect
 		 * blocks when longs have 32 bits and the block size is more
 		 * than 4K.
 		 */
 		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
 		if (bn < qblockcnt)
 			break;
 		blockcnt = qblockcnt;
 	}
 
 	/* Calculate the address of the first meta-block. */
 	if (realbn >= 0)
 		metalbn = -(realbn - bn + NIADDR - i);
 	else
 		metalbn = -(-realbn - bn + NIADDR - i);
 
 	/*
 	 * At each iteration, off is the offset into the bap array which is
 	 * an array of disk addresses at the current level of indirection.
 	 * The logical block number and the offset in that block are stored
 	 * into the argument array.
 	 */
 	ap->in_lbn = metalbn;
 	ap->in_off = off = NIADDR - i;
 	ap->in_exists = 0;
 	ap++;
 	for (++numlevels; i <= NIADDR; i++) {
 		/* If searching for a meta-data block, quit when found. */
 		if (metalbn == realbn)
 			break;
 
 		off = (bn / blockcnt) % MNINDIR(ump);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
 		ap->in_off = off;
 		ap->in_exists = 0;
 		++ap;
 
 		metalbn -= -1 + off * blockcnt;
 		blockcnt /= MNINDIR(ump);
 	}
 	if (nump)
 		*nump = numlevels;
 	return (0);
 }
Index: head/sys/gnu/ext2fs/ext2_vfsops.c
===================================================================
--- head/sys/gnu/ext2fs/ext2_vfsops.c	(revision 49534)
+++ head/sys/gnu/ext2fs/ext2_vfsops.c	(revision 49535)
@@ -1,1190 +1,1188 @@
 /*
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*	
  * Copyright (c) 1989, 1991, 1993, 1994	
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 
-#include <miscfs/specfs/specdev.h>
-
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <gnu/ext2fs/fs.h>
 #include <gnu/ext2fs/ext2_extern.h>
 #include <gnu/ext2fs/ext2_fs.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 
 static int ext2_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p));
 static int ext2_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *));
 static int ext2_reload __P((struct mount *mountp, struct ucred *cred,
 			struct proc *p));
 static int ext2_sbupdate __P((struct ufsmount *, int));
 static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *));
 static int ext2_unmount __P((struct mount *, int, struct proc *));
 static int ext2_vget __P((struct mount *, ino_t, struct vnode **));
 static int ext2_vptofh __P((struct vnode *, struct fid *));
 
 static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
 
 static struct vfsops ext2fs_vfsops = {
 	ext2_mount,
 	ufs_start,		/* empty function */
 	ext2_unmount,
 	ufs_root,		/* root inode via vget */
 	ufs_quotactl,		/* does operations associated with quotas */
 	ext2_statfs,
 	ext2_sync,
 	ext2_vget,
 	ext2_fhtovp,
 	ext2_vptofh,
 	ext2_init,
 };
 
 VFS_SET(ext2fs_vfsops, ext2fs, 0);
 #define bsd_malloc malloc
 #define bsd_free free
 
 static int ext2fs_inode_hash_lock;
 
 static int	compute_sb_data __P((struct vnode * devvp,
 				     struct ext2_super_block * es,
 				     struct ext2_sb_info * fs));
 
 #ifdef notyet
 static int ext2_mountroot __P((void));
 
 /*
  * Called by main() when ext2fs is going to be mounted as root.
  *
  * Name is updated by mount(8) after booting.
  */
 #define ROOTNAME	"root_device"
 
 static int
 ext2_mountroot()
 {
 	register struct ext2_sb_info *fs;
 	register struct mount *mp;
 	struct proc *p = curproc;
 	struct ufsmount *ump;
 	u_int size;
 	int error;
 	
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("ext2_mountroot: can't find rootvp");
 		return (error);
 	}
 	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &ext2fs_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR)
 		mp->mnt_flag |= MNT_NOCLUSTERR;
 	if (bdevsw(rootdev)->d_flags & D_NOCLUSTERW)
 		mp->mnt_flag |= MNT_NOCLUSTERW;
 	if (error = ext2_mountfs(rootvp, mp, p)) {
 		bsd_free(mp, M_MOUNT);
 		return (error);
 	}
 	if (error = vfs_lock(mp)) {
 		(void)ext2_unmount(mp, 0, p);
 		bsd_free(mp, M_MOUNT);
 		return (error);
 	}
 	CIRCLEQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 	mp->mnt_flag |= MNT_ROOTFS;
 	mp->mnt_vnodecovered = NULLVP;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
 	fs->fs_fsmnt[0] = '/';
 	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
 	    MNAMELEN);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)ext2_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
 	return (0);
 }
 #endif
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 ext2_mount(mp, path, data, ndp, p)
 	register struct mount *mp;	
 	char *path;
 	caddr_t data;		/* this is actually a (struct ufs_args *) */
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct ext2_sb_info *fs;
 	u_int size;
 	int error, flags;
 	mode_t accessmode;
 
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0)
 		return (error);
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
 	 * if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_e2fs;
 		error = 0;
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (vfs_busy(mp, LK_NOWAIT, 0, p))
 				return (EBUSY);
 			error = ext2_flushfiles(mp, flags, p);
 			vfs_unbusy(mp, p);
 			if (!error && fs->s_wasvalid) {
 				fs->s_es->s_state |= EXT2_VALID_FS;
 				ext2_sbupdate(ump, MNT_WAIT);
 			}
 			fs->s_rd_only = 1;
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (error)
 			return (error);
 		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = ump->um_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    p->p_ucred, p)) != 0) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 
 			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
 			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf(
 "WARNING: %s was not properly dismounted\n",
 					    fs->fs_fsmnt);
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			fs->s_es->s_state &= ~EXT2_VALID_FS;
 			ext2_sbupdate(ump, MNT_WAIT);
 			fs->s_rd_only = 0;
 		}
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &ump->um_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		error = ext2_mountfs(devvp, mp, p);
 	} else {
 		if (devvp != ump->um_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
 	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
 	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
 	    MNAMELEN);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)ext2_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * checks that the data in the descriptor blocks make sense
  * this is taken from ext2/super.c
  */
 static int ext2_check_descriptors (struct ext2_sb_info * sb)
 {
         int i;
         int desc_block = 0;
         unsigned long block = sb->s_es->s_first_data_block;
         struct ext2_group_desc * gdp = NULL;
 
         /* ext2_debug ("Checking group descriptors"); */
 
         for (i = 0; i < sb->s_groups_count; i++)
         {
 		/* examine next descriptor block */
                 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
                         gdp = (struct ext2_group_desc *) 
 				sb->s_group_desc[desc_block++]->b_data;
                 if (gdp->bg_block_bitmap < block ||
                     gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Block bitmap for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_block_bitmap);
                         return 0;
                 }
                 if (gdp->bg_inode_bitmap < block ||
                     gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Inode bitmap for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_inode_bitmap);
                         return 0;
                 }
                 if (gdp->bg_inode_table < block ||
                     gdp->bg_inode_table + sb->s_itb_per_group >=
                     block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Inode table for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_inode_table);
                         return 0;
                 }
                 block += EXT2_BLOCKS_PER_GROUP(sb);
                 gdp++;
         }
         return 1;
 }
 
 /*
  * this computes the fields of the  ext2_sb_info structure from the
  * data in the ext2_super_block structure read in
  */
 static int compute_sb_data(devvp, es, fs)
 	struct vnode * devvp;
 	struct ext2_super_block * es;
 	struct ext2_sb_info * fs;
 {
     int db_count, error;
     int i, j;
     int logic_sb_block = 1;	/* XXX for now */
 
 #if 1
 #define V(v)  
 #else
 #define V(v)  printf(#v"= %d\n", fs->v);
 #endif
 
     fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 
     V(s_blocksize)
     fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
     V(s_bshift)
     fs->s_fsbtodb = es->s_log_block_size + 1;
     V(s_fsbtodb)
     fs->s_qbmask = fs->s_blocksize - 1;
     V(s_bmask)
     fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
     V(s_blocksize_bits)
     fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
     V(s_frag_size)
     if (fs->s_frag_size)
 	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
     V(s_frags_per_block)
     fs->s_blocks_per_group = es->s_blocks_per_group;
     V(s_blocks_per_group)
     fs->s_frags_per_group = es->s_frags_per_group;
     V(s_frags_per_group)
     fs->s_inodes_per_group = es->s_inodes_per_group;
     V(s_inodes_per_group)
     fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
     V(s_inodes_per_block)
     fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
     V(s_itb_per_group)
     fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
     V(s_desc_per_block)
     /* s_resuid / s_resgid ? */
     fs->s_groups_count = (es->s_blocks_count -
 			  es->s_first_data_block +
 			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
 			 EXT2_BLOCKS_PER_GROUP(fs);
     V(s_groups_count)
     db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
 	EXT2_DESC_PER_BLOCK(fs);
     fs->s_db_per_group = db_count;
     V(s_db_per_group)
 
     fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
 		M_UFSMNT, M_WAITOK);
 
     /* adjust logic_sb_block */
     if(fs->s_blocksize > SBSIZE) 
 	/* Godmar thinks: if the blocksize is greater than 1024, then
 	   the superblock is logically part of block zero. 
 	 */
         logic_sb_block = 0;
     
     for (i = 0; i < db_count; i++) {
 	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 
 		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
 	if(error) {
 	    for (j = 0; j < i; j++)
 		brelse(fs->s_group_desc[j]);
 	    bsd_free(fs->s_group_desc, M_UFSMNT);
 	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
 	    return EIO;
 	}
 	/* Set the B_LOCKED flag on the buffer, then brelse() it */
 	LCK_BUF(fs->s_group_desc[i])
     }
     if(!ext2_check_descriptors(fs)) {
 	    for (j = 0; j < db_count; j++)
 		    ULCK_BUF(fs->s_group_desc[j])
 	    bsd_free(fs->s_group_desc, M_UFSMNT);
 	    printf("EXT2-fs: (ext2_check_descriptors failure) "
 		   "unable to read group descriptors\n");
 	    return EIO;
     }
 
     for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
 	    fs->s_inode_bitmap_number[i] = 0;
 	    fs->s_inode_bitmap[i] = NULL;
 	    fs->s_block_bitmap_number[i] = 0;
 	    fs->s_block_bitmap[i] = NULL;
     }
     fs->s_loaded_inode_bitmaps = 0;
     fs->s_loaded_block_bitmaps = 0;
     return 0;
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 static int
 ext2_reload(mountp, cred, p)
 	register struct mount *mountp;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct ext2_super_block * es;
 	struct ext2_sb_info *fs;
 	int error;
 
 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mountp)->um_devvp;
 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
 		panic("ext2_reload: dirty1");
 	/*
 	 * Step 2: re-read superblock from disk.
 	 * constants have been adjusted for ext2
 	 */
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		return (error);
 	es = (struct ext2_super_block *)bp->b_data;
 	if (es->s_magic != EXT2_SUPER_MAGIC) {
 		if(es->s_magic == EXT2_PRE_02B_MAGIC)
 		    printf("This filesystem bears the magic number of a pre "
 			   "0.2b version of ext2. This is not supported by "
 			   "Lites.\n");
 		else
 		    printf("Wrong magic number: %x (expected %x for ext2 fs\n",
 			es->s_magic, EXT2_SUPER_MAGIC);
 		brelse(bp);
 		return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOUFS(mountp)->um_e2fs;
 	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
 
 	if((error = compute_sb_data(devvp, es, fs)) != 0) {
 		brelse(bp);
 		return error;
 	}
 #ifdef UNKLAR
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 #endif
 	brelse(bp);
 
 loop:
 	simple_lock(&mntvnode_slock);
 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		if (vp->v_mount != mountp) {
 			simple_unlock(&mntvnode_slock);
 			goto loop;
 		}
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
   		if (vrecycle(vp, &mntvnode_slock, p))
   			goto loop;
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		simple_lock(&vp->v_interlock);
 		simple_unlock(&mntvnode_slock);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ext2_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->s_blocksize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 		ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + 
 		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), 
 		    &ip->i_din);
 		brelse(bp);
 		vput(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 ext2_mountfs(devvp, mp, p)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	struct buf *bp;
 	register struct ext2_sb_info *fs;
 	struct ext2_super_block * es;
 	dev_t dev = devvp->v_rdev;
 	struct partinfo dpart;
 	int havepart = 0;
 	int error, i, size;
 	int ronly;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)) != 0)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
 		return (error);
 #ifdef READONLY
 /* turn on this to force it to be read-only */
 	mp->mnt_flag |= MNT_RDONLY;
 #endif
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) != 0)
 		return (error);
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
 		size = DEV_BSIZE;
 	else {
 		havepart = 1;
 		size = dpart.disklab->d_secsize;
 	}
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		goto out;
 	es = (struct ext2_super_block *)bp->b_data;
 	if (es->s_magic != EXT2_SUPER_MAGIC) {
 		if(es->s_magic == EXT2_PRE_02B_MAGIC)
 		    printf("This filesystem bears the magic number of a pre "
 			   "0.2b version of ext2. This is not supported by "
 			   "Lites.\n");
 		else
 		    printf("Wrong magic number: %x (expected %x for EXT2FS)\n",
 			es->s_magic, EXT2_SUPER_MAGIC);
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	if ((es->s_state & EXT2_VALID_FS) == 0 ||
 	    (es->s_state & EXT2_ERROR_FS)) {
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf(
 "WARNING: Filesystem was not properly dismounted\n");
 		} else {
 			printf(
 "WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
 			error = EPERM;
 			goto out;
 		}
 	}
 	ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
 	bzero((caddr_t)ump, sizeof *ump);
 	ump->um_malloctype = M_EXT2NODE;
 	ump->um_blkatoff = ext2_blkatoff;
 	ump->um_truncate = ext2_truncate;
 	ump->um_update = ext2_update;
 	ump->um_valloc = ext2_valloc;
 	ump->um_vfree = ext2_vfree;
 	/* I don't know whether this is the right strategy. Note that
 	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
 	   while Linux keeps the super block in a locked buffer
 	 */
 	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 
 		M_UFSMNT, M_WAITOK);
 	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 
 		M_UFSMNT, M_WAITOK);
 	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
 		goto out;
 	/*
 	 * We don't free the group descriptors allocated by compute_sb_data()
 	 * until ext2_unmount().  This is OK since the mount will succeed.
 	 */
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
 	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
 	/* if the fs is not mounted read-only, make sure the super block is 
 	   always written back on a sync()
 	 */
 	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
 	if (ronly == 0) {
 		fs->s_dirt = 1;		/* mark it modified */
 		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
 	}
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	/* setting those two parameters allows us to use 
 	   ufs_bmap w/o changse !
 	*/
 	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
 	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
 	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP; 
 	devvp->v_specmountpoint = mp;
 	if (ronly == 0) 
 		ext2_sbupdate(ump, MNT_WAIT);
 	return (0);
 out:
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	if (ump) {
 		bsd_free(ump->um_e2fs->s_es, M_UFSMNT);
 		bsd_free(ump->um_e2fs, M_UFSMNT);
 		bsd_free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 /*
  * unmount system call
  */
 static int
 ext2_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct ext2_sb_info *fs;
 	int error, flags, ronly, i;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		if (mp->mnt_flag & MNT_ROOTFS)
 			return (EINVAL);
 		flags |= FORCECLOSE;
 	}
 	if ((error = ext2_flushfiles(mp, flags, p)) != 0)
 		return (error);
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	ronly = fs->s_rd_only;
 	if (ronly == 0) {
 		if (fs->s_wasvalid)
 			fs->s_es->s_state |= EXT2_VALID_FS;
 		ext2_sbupdate(ump, MNT_WAIT);
 	}
 
 	/* release buffers containing group descriptors */
 	for(i = 0; i < fs->s_db_per_group; i++) 
 		ULCK_BUF(fs->s_group_desc[i])
 	bsd_free(fs->s_group_desc, M_UFSMNT);
 
 	/* release cached inode/block bitmaps */
         for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
                 if (fs->s_inode_bitmap[i])
 			ULCK_BUF(fs->s_inode_bitmap[i])
 
         for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
                 if (fs->s_block_bitmap[i])
 			ULCK_BUF(fs->s_block_bitmap[i])
 
 	ump->um_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 	vrele(ump->um_devvp);
 	bsd_free(fs->s_es, M_UFSMNT);
 	bsd_free(fs, M_UFSMNT);
 	bsd_free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 static int
 ext2_flushfiles(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	int error;
 #if QUOTA
 	int i;
 #endif
 
 	ump = VFSTOUFS(mp);
 #if QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(p, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
 	error = vflush(mp, NULLVP, flags);
 	return (error);
 }
 
 /*
  * Get file system statistics.
  * taken from ext2/super.c ext2_statfs
  */
 static int
 ext2_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
         unsigned long overhead;
 	unsigned long overhead_per_group;
 
 	register struct ufsmount *ump;
 	register struct ext2_sb_info *fs;
 	register struct ext2_super_block *es;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	es = fs->s_es;
 
 	if (es->s_magic != EXT2_SUPER_MAGIC)
 		panic("ext2_statfs - magic number spoiled");
 
 	/*
 	 * Compute the overhead (FS structures)
 	 */
 	overhead_per_group = 1 /* super block */ +
 			     fs->s_db_per_group +
 			     1 /* block bitmap */ +
 			     1 /* inode bitmap */ +
 			     fs->s_itb_per_group;
 	overhead = es->s_first_data_block + 
 		   fs->s_groups_count * overhead_per_group;
 
 	sbp->f_bsize = EXT2_FRAG_SIZE(fs);	
 	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
 	sbp->f_blocks = es->s_blocks_count - overhead;
 	sbp->f_bfree = es->s_free_blocks_count; 
 	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 
 	sbp->f_files = es->s_inodes_count; 
 	sbp->f_ffree = es->s_free_inodes_count; 
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 static int
 ext2_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *nvp, *vp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct ext2_sb_info *fs;
 	int error, allerror = 0;
 
 	fs = ump->um_e2fs;
 	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ext2_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		ip = VTOI(vp);
 		if (vp->v_type == VNON ||
 		    ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0, p);
 	}
 #if QUOTA
 	qsync(mp);
 #endif
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->s_dirt != 0) {
 		fs->s_dirt = 0;
 		fs->s_es->s_wtime = time_second;
 		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
 			allerror = error;
 	}
 	return (allerror);
 }
 
 /*
  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int
 ext2_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	register struct ext2_sb_info *fs;
 	register struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int i, error;
 	int used_blocks;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 restart:
 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
 		return (0);
 
 	/*
 	 * Lock out the creation of new entries in the FFS hash table in
 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
 	 * may occur!
 	 */
 	if (ext2fs_inode_hash_lock) {
 		while (ext2fs_inode_hash_lock) {
 			ext2fs_inode_hash_lock = -1;
 			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
 		}
 		goto restart;
 	}
 	ext2fs_inode_hash_lock = 1;
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ext2_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ext2_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) {
 		if (ext2fs_inode_hash_lock < 0)
 			wakeup(&ext2fs_inode_hash_lock);
 		ext2fs_inode_hash_lock = 0;
 		*vpp = NULL;
 		FREE(ip, M_EXT2NODE);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	lockinit(&ip->i_lock, PINOD, "ext2in", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_e2fs = fs = ump->um_e2fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #if QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
 		ip->i_dquot[i] = NODQUOT;
 #endif
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	ufs_ihashins(ip);
 
 	if (ext2fs_inode_hash_lock < 0)
 		wakeup(&ext2fs_inode_hash_lock);
 	ext2fs_inode_hash_lock = 0;
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 #if 0
 printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
 #endif
 	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		vput(vp);
 		brelse(bp);
 		*vpp = NULL;
 		return (error);
 	}
 	/* convert ext2 inode to dinode */
 	ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
 			ino_to_fsbo(fs, ino)), &ip->i_din);
 	ip->i_block_group = ino_to_cg(fs, ino);
 	ip->i_next_alloc_block = 0;
 	ip->i_next_alloc_goal = 0;
 	ip->i_prealloc_count = 0;
 	ip->i_prealloc_block = 0;
         /* now we want to make sure that block pointers for unused
            blocks are zeroed out - ext2_balloc depends on this 
 	   although for regular files and directories only
 	*/
 	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
 		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
 		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 			ip->i_db[i] = 0;
 	}
 /*
 	ext2_print_inode(ip);
 */
 	brelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ext2_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	register struct ufid *ufhp;
 	struct ext2_sb_info *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_e2fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group)
 		return (ESTALE);
 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 static int
 ext2_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct inode *ip;
 	register struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ext2_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	register struct ext2_sb_info *fs = mp->um_e2fs;
 	register struct ext2_super_block *es = fs->s_es;
 	register struct buf *bp;
 	int error = 0;
 /*
 printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
 */
 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
 	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
 	if (waitfor == MNT_WAIT)
 		error = bwrite(bp);
 	else
 		bawrite(bp);
 
 	/*
 	 * The buffers for group descriptors, inode bitmaps and block bitmaps
 	 * are not busy at this point and are (hopefully) written by the
 	 * usual sync mechanism. No need to write them here
 		 */
 
 	return (error);
 }
Index: head/sys/gnu/fs/ext2fs/ext2_bmap.c
===================================================================
--- head/sys/gnu/fs/ext2fs/ext2_bmap.c	(revision 49534)
+++ head/sys/gnu/fs/ext2fs/ext2_bmap.c	(revision 49535)
@@ -1,355 +1,354 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
- * $Id: ufs_bmap.c,v 1.27 1999/05/07 10:11:36 phk Exp $
+ * $Id: ufs_bmap.c,v 1.28 1999/05/08 06:40:25 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/conf.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
-#include <miscfs/specfs/specdev.h>
 
 /*
  * Bmap converts a the logical block number of a file to its physical block
  * number on the disk. The conversion is done by using the logical block
  * number to index into the array of block pointers described by the dinode.
  */
 int
 ufs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		ufs_daddr_t a_bn;
 		struct vnode **a_vpp;
 		ufs_daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	/*
 	 * Check for underlying vnode requests and ensure that logical
 	 * to physical mapping is requested.
 	 */
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
 	if (ap->a_bnp == NULL)
 		return (0);
 
 	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
 	    ap->a_runp, ap->a_runb));
 }
 
 /*
  * Indirect blocks are now on the vnode for the file.  They are given negative
  * logical block numbers.  Indirect blocks are addressed by the negative
  * address of the first data block to which they point.  Double indirect blocks
  * are addressed by one less than the address of the first indirect block to
  * which they point.  Triple indirect blocks are addressed by one less than
  * the address of the first double indirect block to which they point.
  *
  * ufs_bmaparray does the bmap conversion, and if requested returns the
  * array of logical blocks which must be traversed to get to a block.
  * Each entry contains the offset into that block that gets you to the
  * next block and the disk address of the block (if it is assigned).
  */
 
 int
 ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	ufs_daddr_t *bnp;
 	struct indir *ap;
 	int *nump;
 	int *runp;
 	int *runb;
 {
 	register struct inode *ip;
 	struct buf *bp;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *devvp;
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
 	int error, maxrun = 0, num;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 #ifdef DIAGNOSTIC
 	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
 		panic("ufs_bmaparray: invalid arguments");
 #endif
 
 	if (runp) {
 		*runp = 0;
 	}
 
 	if (runb) {
 		*runb = 0;
 	}
 
 	maxrun = 0;
 	if (runp || runb || (vp->v_maxio == 0)) {
 
 		struct vnode *devvp;
 		int blksize;
 
 		blksize = mp->mnt_stat.f_iosize;
 
 		/*
 		 * XXX
 		 * If MAXPHYS is the largest transfer the disks can handle,
 		 * we probably want maxrun to be 1 block less so that we
 		 * don't create a block larger than the device can handle.
 		 */
 		devvp = ip->i_devvp;
 
 		if (devvp != NULL && devvp->v_tag != VT_MFS &&
 		    devvp->v_type == VBLK) {
 			if (bdevsw(devvp->v_rdev)->d_maxio > MAXPHYS) {
 				maxrun = MAXPHYS;
 				vp->v_maxio = MAXPHYS;
 			} else {
 				maxrun = bdevsw(devvp->v_rdev)->d_maxio;
 				vp->v_maxio = bdevsw(devvp->v_rdev)->d_maxio;
 			}
 			maxrun = maxrun / blksize;
 			maxrun -= 1;
 		}
 
 		if (maxrun <= 0) {
 			vp->v_maxio = DFLTPHYS;
 			maxrun = DFLTPHYS / blksize;
 			maxrun -= 1;
 		}
 	}
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
 		nump = &num;
 	error = ufs_getlbns(vp, bn, xap, nump);
 	if (error)
 		return (error);
 
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
 		if (*bnp == 0)
 			*bnp = -1;
 		else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
 			    ++bn, ++*runp);
 			bn = bnb;
 			if (runb && (bn > 0)) {
 				for (--bn; (bn >= 0) && (*runb < maxrun) &&
 					is_sequential(ump, ip->i_db[bn],
 						ip->i_db[bn+1]);
 						--bn, ++*runb);
 			}
 		}
 		return (0);
 	}
 
 
 	/* Get disk address out of indirect block array */
 	daddr = ip->i_ib[xap->in_off];
 
 	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
 	for (bp = NULL, ++xap; --num; ++xap) {
 		/*
 		 * Exit the loop if there is no disk address assigned yet and
 		 * the indirect block isn't in the cache, or if we were
 		 * looking for an indirect block and we've found it.
 		 */
 
 		metalbn = xap->in_lbn;
 		if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn)
 			break;
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
 		 */
 		if (bp)
 			bqrelse(bp);
 
 		xap->in_exists = 1;
 		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
 		if ((bp->b_flags & B_CACHE) == 0) {
 #ifdef DIAGNOSTIC
 			if (!daddr)
 				panic("ufs_bmaparray: indirect block not in cache");
 #endif
 			bp->b_blkno = blkptrtodb(ump, daddr);
 			bp->b_flags |= B_READ;
 			bp->b_flags &= ~(B_INVAL|B_ERROR);
 			vfs_busy_pages(bp, 0);
 			VOP_STRATEGY(bp->b_vp, bp);
 			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
 			error = biowait(bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off];
 		if (num == 1 && daddr && runp) {
 			for (bn = xap->in_off + 1;
 			    bn < MNINDIR(ump) && *runp < maxrun &&
 			    is_sequential(ump,
 			    ((ufs_daddr_t *)bp->b_data)[bn - 1],
 			    ((ufs_daddr_t *)bp->b_data)[bn]);
 			    ++bn, ++*runp);
 			bn = xap->in_off;
 			if (runb && bn) {
 				for(--bn; bn > 0 && *runb < maxrun &&
 			    		is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
 					    ((daddr_t *)bp->b_data)[bn+1]);
 			    		--bn, ++*runb);
 			}
 		}
 	}
 	if (bp)
 		bqrelse(bp);
 
 	daddr = blkptrtodb(ump, daddr);
 	*bnp = daddr == 0 ? -1 : daddr;
 	return (0);
 }
 
 /*
  * Create an array of logical block number/offset pairs which represent the
  * path of indirect blocks required to access a data block.  The first "pair"
  * contains the logical block number of the appropriate single, double or
  * triple indirect block and the offset into the inode indirect block array.
  * Note, the logical block number of the inode single/double/triple indirect
  * block appears twice in the array, once with the offset into the i_ib and
  * once with the offset into the page itself.
  */
 int
 ufs_getlbns(vp, bn, ap, nump)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	struct indir *ap;
 	int *nump;
 {
 	long blockcnt, metalbn, realbn;
 	struct ufsmount *ump;
 	int i, numlevels, off;
 	int64_t qblockcnt;
 
 	ump = VFSTOUFS(vp->v_mount);
 	if (nump)
 		*nump = 0;
 	numlevels = 0;
 	realbn = bn;
 	if ((long)bn < 0)
 		bn = -(long)bn;
 
 	/* The first NDADDR blocks are direct blocks. */
 	if (bn < NDADDR)
 		return (0);
 
 	/*
 	 * Determine the number of levels of indirection.  After this loop
 	 * is done, blockcnt indicates the number of data blocks possible
 	 * at the previous level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
 	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
 		/*
 		 * Use int64_t's here to avoid overflow for triple indirect
 		 * blocks when longs have 32 bits and the block size is more
 		 * than 4K.
 		 */
 		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
 		if (bn < qblockcnt)
 			break;
 		blockcnt = qblockcnt;
 	}
 
 	/* Calculate the address of the first meta-block. */
 	if (realbn >= 0)
 		metalbn = -(realbn - bn + NIADDR - i);
 	else
 		metalbn = -(-realbn - bn + NIADDR - i);
 
 	/*
 	 * At each iteration, off is the offset into the bap array which is
 	 * an array of disk addresses at the current level of indirection.
 	 * The logical block number and the offset in that block are stored
 	 * into the argument array.
 	 */
 	ap->in_lbn = metalbn;
 	ap->in_off = off = NIADDR - i;
 	ap->in_exists = 0;
 	ap++;
 	for (++numlevels; i <= NIADDR; i++) {
 		/* If searching for a meta-data block, quit when found. */
 		if (metalbn == realbn)
 			break;
 
 		off = (bn / blockcnt) % MNINDIR(ump);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
 		ap->in_off = off;
 		ap->in_exists = 0;
 		++ap;
 
 		metalbn -= -1 + off * blockcnt;
 		blockcnt /= MNINDIR(ump);
 	}
 	if (nump)
 		*nump = numlevels;
 	return (0);
 }
Index: head/sys/gnu/fs/ext2fs/ext2_vfsops.c
===================================================================
--- head/sys/gnu/fs/ext2fs/ext2_vfsops.c	(revision 49534)
+++ head/sys/gnu/fs/ext2fs/ext2_vfsops.c	(revision 49535)
@@ -1,1190 +1,1188 @@
 /*
  *  modified for EXT2FS support in Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*	
  * Copyright (c) 1989, 1991, 1993, 1994	
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.8 (Berkeley) 4/18/94
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 
-#include <miscfs/specfs/specdev.h>
-
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <gnu/ext2fs/fs.h>
 #include <gnu/ext2fs/ext2_extern.h>
 #include <gnu/ext2fs/ext2_fs.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 
 static int ext2_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int ext2_flushfiles __P((struct mount *mp, int flags, struct proc *p));
 static int ext2_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int ext2_mountfs __P((struct vnode *, struct mount *, struct proc *));
 static int ext2_reload __P((struct mount *mountp, struct ucred *cred,
 			struct proc *p));
 static int ext2_sbupdate __P((struct ufsmount *, int));
 static int ext2_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int ext2_sync __P((struct mount *, int, struct ucred *, struct proc *));
 static int ext2_unmount __P((struct mount *, int, struct proc *));
 static int ext2_vget __P((struct mount *, ino_t, struct vnode **));
 static int ext2_vptofh __P((struct vnode *, struct fid *));
 
 static MALLOC_DEFINE(M_EXT2NODE, "EXT2 node", "EXT2 vnode private part");
 
 static struct vfsops ext2fs_vfsops = {
 	ext2_mount,
 	ufs_start,		/* empty function */
 	ext2_unmount,
 	ufs_root,		/* root inode via vget */
 	ufs_quotactl,		/* does operations associated with quotas */
 	ext2_statfs,
 	ext2_sync,
 	ext2_vget,
 	ext2_fhtovp,
 	ext2_vptofh,
 	ext2_init,
 };
 
 VFS_SET(ext2fs_vfsops, ext2fs, 0);
 #define bsd_malloc malloc
 #define bsd_free free
 
 static int ext2fs_inode_hash_lock;
 
 static int	compute_sb_data __P((struct vnode * devvp,
 				     struct ext2_super_block * es,
 				     struct ext2_sb_info * fs));
 
 #ifdef notyet
 static int ext2_mountroot __P((void));
 
 /*
  * Called by main() when ext2fs is going to be mounted as root.
  *
  * Name is updated by mount(8) after booting.
  */
 #define ROOTNAME	"root_device"
 
 static int
 ext2_mountroot()
 {
 	register struct ext2_sb_info *fs;
 	register struct mount *mp;
 	struct proc *p = curproc;
 	struct ufsmount *ump;
 	u_int size;
 	int error;
 	
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("ext2_mountroot: can't find rootvp");
 		return (error);
 	}
 	mp = bsd_malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &ext2fs_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR)
 		mp->mnt_flag |= MNT_NOCLUSTERR;
 	if (bdevsw(rootdev)->d_flags & D_NOCLUSTERW)
 		mp->mnt_flag |= MNT_NOCLUSTERW;
 	if (error = ext2_mountfs(rootvp, mp, p)) {
 		bsd_free(mp, M_MOUNT);
 		return (error);
 	}
 	if (error = vfs_lock(mp)) {
 		(void)ext2_unmount(mp, 0, p);
 		bsd_free(mp, M_MOUNT);
 		return (error);
 	}
 	CIRCLEQ_INSERT_HEAD(&mountlist, mp, mnt_list);
 	mp->mnt_flag |= MNT_ROOTFS;
 	mp->mnt_vnodecovered = NULLVP;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt));
 	fs->fs_fsmnt[0] = '/';
 	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
 	    MNAMELEN);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)ext2_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	inittodr(fs->s_es->s_wtime);		/* this helps to set the time */
 	return (0);
 }
 #endif
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 ext2_mount(mp, path, data, ndp, p)
 	register struct mount *mp;	
 	char *path;
 	caddr_t data;		/* this is actually a (struct ufs_args *) */
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct ext2_sb_info *fs;
 	u_int size;
 	int error, flags;
 	mode_t accessmode;
 
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) != 0)
 		return (error);
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
 	 * if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_e2fs;
 		error = 0;
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if (fs->s_rd_only == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (vfs_busy(mp, LK_NOWAIT, 0, p))
 				return (EBUSY);
 			error = ext2_flushfiles(mp, flags, p);
 			vfs_unbusy(mp, p);
 			if (!error && fs->s_wasvalid) {
 				fs->s_es->s_state |= EXT2_VALID_FS;
 				ext2_sbupdate(ump, MNT_WAIT);
 			}
 			fs->s_rd_only = 1;
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			error = ext2_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (error)
 			return (error);
 		if (fs->s_rd_only && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = ump->um_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    p->p_ucred, p)) != 0) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 
 			if ((fs->s_es->s_state & EXT2_VALID_FS) == 0 ||
 			    (fs->s_es->s_state & EXT2_ERROR_FS)) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf(
 "WARNING: %s was not properly dismounted\n",
 					    fs->fs_fsmnt);
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
 					return (EPERM);
 				}
 			}
 			fs->s_es->s_state &= ~EXT2_VALID_FS;
 			ext2_sbupdate(ump, MNT_WAIT);
 			fs->s_rd_only = 0;
 		}
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &ump->um_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)) != 0)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		error = ext2_mountfs(devvp, mp, p);
 	} else {
 		if (devvp != ump->um_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	(void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size);
 	bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size);
 	bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
 	    MNAMELEN);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)ext2_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * checks that the data in the descriptor blocks make sense
  * this is taken from ext2/super.c
  */
 static int ext2_check_descriptors (struct ext2_sb_info * sb)
 {
         int i;
         int desc_block = 0;
         unsigned long block = sb->s_es->s_first_data_block;
         struct ext2_group_desc * gdp = NULL;
 
         /* ext2_debug ("Checking group descriptors"); */
 
         for (i = 0; i < sb->s_groups_count; i++)
         {
 		/* examine next descriptor block */
                 if ((i % EXT2_DESC_PER_BLOCK(sb)) == 0)
                         gdp = (struct ext2_group_desc *) 
 				sb->s_group_desc[desc_block++]->b_data;
                 if (gdp->bg_block_bitmap < block ||
                     gdp->bg_block_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Block bitmap for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_block_bitmap);
                         return 0;
                 }
                 if (gdp->bg_inode_bitmap < block ||
                     gdp->bg_inode_bitmap >= block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Inode bitmap for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_inode_bitmap);
                         return 0;
                 }
                 if (gdp->bg_inode_table < block ||
                     gdp->bg_inode_table + sb->s_itb_per_group >=
                     block + EXT2_BLOCKS_PER_GROUP(sb))
                 {
                         printf ("ext2_check_descriptors: "
                                     "Inode table for group %d"
                                     " not in group (block %lu)!\n",
                                     i, (unsigned long) gdp->bg_inode_table);
                         return 0;
                 }
                 block += EXT2_BLOCKS_PER_GROUP(sb);
                 gdp++;
         }
         return 1;
 }
 
 /*
  * this computes the fields of the  ext2_sb_info structure from the
  * data in the ext2_super_block structure read in
  */
 static int compute_sb_data(devvp, es, fs)
 	struct vnode * devvp;
 	struct ext2_super_block * es;
 	struct ext2_sb_info * fs;
 {
     int db_count, error;
     int i, j;
     int logic_sb_block = 1;	/* XXX for now */
 
 #if 1
 #define V(v)  
 #else
 #define V(v)  printf(#v"= %d\n", fs->v);
 #endif
 
     fs->s_blocksize = EXT2_MIN_BLOCK_SIZE << es->s_log_block_size; 
     V(s_blocksize)
     fs->s_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->s_log_block_size;
     V(s_bshift)
     fs->s_fsbtodb = es->s_log_block_size + 1;
     V(s_fsbtodb)
     fs->s_qbmask = fs->s_blocksize - 1;
     V(s_bmask)
     fs->s_blocksize_bits = EXT2_BLOCK_SIZE_BITS(es);
     V(s_blocksize_bits)
     fs->s_frag_size = EXT2_MIN_FRAG_SIZE << es->s_log_frag_size;
     V(s_frag_size)
     if (fs->s_frag_size)
 	fs->s_frags_per_block = fs->s_blocksize / fs->s_frag_size;
     V(s_frags_per_block)
     fs->s_blocks_per_group = es->s_blocks_per_group;
     V(s_blocks_per_group)
     fs->s_frags_per_group = es->s_frags_per_group;
     V(s_frags_per_group)
     fs->s_inodes_per_group = es->s_inodes_per_group;
     V(s_inodes_per_group)
     fs->s_inodes_per_block = fs->s_blocksize / EXT2_INODE_SIZE;
     V(s_inodes_per_block)
     fs->s_itb_per_group = fs->s_inodes_per_group /fs->s_inodes_per_block;
     V(s_itb_per_group)
     fs->s_desc_per_block = fs->s_blocksize / sizeof (struct ext2_group_desc);
     V(s_desc_per_block)
     /* s_resuid / s_resgid ? */
     fs->s_groups_count = (es->s_blocks_count -
 			  es->s_first_data_block +
 			  EXT2_BLOCKS_PER_GROUP(fs) - 1) /
 			 EXT2_BLOCKS_PER_GROUP(fs);
     V(s_groups_count)
     db_count = (fs->s_groups_count + EXT2_DESC_PER_BLOCK(fs) - 1) /
 	EXT2_DESC_PER_BLOCK(fs);
     fs->s_db_per_group = db_count;
     V(s_db_per_group)
 
     fs->s_group_desc = bsd_malloc(db_count * sizeof (struct buf *),
 		M_UFSMNT, M_WAITOK);
 
     /* adjust logic_sb_block */
     if(fs->s_blocksize > SBSIZE) 
 	/* Godmar thinks: if the blocksize is greater than 1024, then
 	   the superblock is logically part of block zero. 
 	 */
         logic_sb_block = 0;
     
     for (i = 0; i < db_count; i++) {
 	error = bread(devvp , fsbtodb(fs, logic_sb_block + i + 1), 
 		fs->s_blocksize, NOCRED, &fs->s_group_desc[i]);
 	if(error) {
 	    for (j = 0; j < i; j++)
 		brelse(fs->s_group_desc[j]);
 	    bsd_free(fs->s_group_desc, M_UFSMNT);
 	    printf("EXT2-fs: unable to read group descriptors (%d)\n", error);
 	    return EIO;
 	}
 	/* Set the B_LOCKED flag on the buffer, then brelse() it */
 	LCK_BUF(fs->s_group_desc[i])
     }
     if(!ext2_check_descriptors(fs)) {
 	    for (j = 0; j < db_count; j++)
 		    ULCK_BUF(fs->s_group_desc[j])
 	    bsd_free(fs->s_group_desc, M_UFSMNT);
 	    printf("EXT2-fs: (ext2_check_descriptors failure) "
 		   "unable to read group descriptors\n");
 	    return EIO;
     }
 
     for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++) {
 	    fs->s_inode_bitmap_number[i] = 0;
 	    fs->s_inode_bitmap[i] = NULL;
 	    fs->s_block_bitmap_number[i] = 0;
 	    fs->s_block_bitmap[i] = NULL;
     }
     fs->s_loaded_inode_bitmaps = 0;
     fs->s_loaded_block_bitmaps = 0;
     return 0;
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 static int
 ext2_reload(mountp, cred, p)
 	register struct mount *mountp;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct ext2_super_block * es;
 	struct ext2_sb_info *fs;
 	int error;
 
 	if ((mountp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mountp)->um_devvp;
 	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
 		panic("ext2_reload: dirty1");
 	/*
 	 * Step 2: re-read superblock from disk.
 	 * constants have been adjusted for ext2
 	 */
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		return (error);
 	es = (struct ext2_super_block *)bp->b_data;
 	if (es->s_magic != EXT2_SUPER_MAGIC) {
 		if(es->s_magic == EXT2_PRE_02B_MAGIC)
 		    printf("This filesystem bears the magic number of a pre "
 			   "0.2b version of ext2. This is not supported by "
 			   "Lites.\n");
 		else
 		    printf("Wrong magic number: %x (expected %x for ext2 fs\n",
 			es->s_magic, EXT2_SUPER_MAGIC);
 		brelse(bp);
 		return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOUFS(mountp)->um_e2fs;
 	bcopy(bp->b_data, fs->s_es, sizeof(struct ext2_super_block));
 
 	if((error = compute_sb_data(devvp, es, fs)) != 0) {
 		brelse(bp);
 		return error;
 	}
 #ifdef UNKLAR
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 #endif
 	brelse(bp);
 
 loop:
 	simple_lock(&mntvnode_slock);
 	for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		if (vp->v_mount != mountp) {
 			simple_unlock(&mntvnode_slock);
 			goto loop;
 		}
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
   		if (vrecycle(vp, &mntvnode_slock, p))
   			goto loop;
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		simple_lock(&vp->v_interlock);
 		simple_unlock(&mntvnode_slock);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ext2_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->s_blocksize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 		ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + 
 		    EXT2_INODE_SIZE * ino_to_fsbo(fs, ip->i_number)), 
 		    &ip->i_din);
 		brelse(bp);
 		vput(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 ext2_mountfs(devvp, mp, p)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	struct buf *bp;
 	register struct ext2_sb_info *fs;
 	struct ext2_super_block * es;
 	dev_t dev = devvp->v_rdev;
 	struct partinfo dpart;
 	int havepart = 0;
 	int error, i, size;
 	int ronly;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)) != 0)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) != 0)
 		return (error);
 #ifdef READONLY
 /* turn on this to force it to be read-only */
 	mp->mnt_flag |= MNT_RDONLY;
 #endif
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) != 0)
 		return (error);
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
 		size = DEV_BSIZE;
 	else {
 		havepart = 1;
 		size = dpart.disklab->d_secsize;
 	}
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0)
 		goto out;
 	es = (struct ext2_super_block *)bp->b_data;
 	if (es->s_magic != EXT2_SUPER_MAGIC) {
 		if(es->s_magic == EXT2_PRE_02B_MAGIC)
 		    printf("This filesystem bears the magic number of a pre "
 			   "0.2b version of ext2. This is not supported by "
 			   "Lites.\n");
 		else
 		    printf("Wrong magic number: %x (expected %x for EXT2FS)\n",
 			es->s_magic, EXT2_SUPER_MAGIC);
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	if ((es->s_state & EXT2_VALID_FS) == 0 ||
 	    (es->s_state & EXT2_ERROR_FS)) {
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf(
 "WARNING: Filesystem was not properly dismounted\n");
 		} else {
 			printf(
 "WARNING: R/W mount denied.  Filesystem is not clean - run fsck\n");
 			error = EPERM;
 			goto out;
 		}
 	}
 	ump = bsd_malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
 	bzero((caddr_t)ump, sizeof *ump);
 	ump->um_malloctype = M_EXT2NODE;
 	ump->um_blkatoff = ext2_blkatoff;
 	ump->um_truncate = ext2_truncate;
 	ump->um_update = ext2_update;
 	ump->um_valloc = ext2_valloc;
 	ump->um_vfree = ext2_vfree;
 	/* I don't know whether this is the right strategy. Note that
 	   we dynamically allocate both a ext2_sb_info and a ext2_super_block
 	   while Linux keeps the super block in a locked buffer
 	 */
 	ump->um_e2fs = bsd_malloc(sizeof(struct ext2_sb_info), 
 		M_UFSMNT, M_WAITOK);
 	ump->um_e2fs->s_es = bsd_malloc(sizeof(struct ext2_super_block), 
 		M_UFSMNT, M_WAITOK);
 	bcopy(es, ump->um_e2fs->s_es, (u_int)sizeof(struct ext2_super_block));
 	if ((error = compute_sb_data(devvp, ump->um_e2fs->s_es, ump->um_e2fs)))
 		goto out;
 	/*
 	 * We don't free the group descriptors allocated by compute_sb_data()
 	 * until ext2_unmount().  This is OK since the mount will succeed.
 	 */
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_e2fs;
 	fs->s_rd_only = ronly;	/* ronly is set according to mnt_flags */
 	/* if the fs is not mounted read-only, make sure the super block is 
 	   always written back on a sync()
 	 */
 	fs->s_wasvalid = fs->s_es->s_state & EXT2_VALID_FS ? 1 : 0;
 	if (ronly == 0) {
 		fs->s_dirt = 1;		/* mark it modified */
 		fs->s_es->s_state &= ~EXT2_VALID_FS;	/* set fs invalid */
 	}
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	/* setting those two parameters allows us to use 
 	   ufs_bmap w/o changse !
 	*/
 	ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs);
 	ump->um_bptrtodb = fs->s_es->s_log_block_size + 1;
 	ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs);
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP; 
 	devvp->v_specmountpoint = mp;
 	if (ronly == 0) 
 		ext2_sbupdate(ump, MNT_WAIT);
 	return (0);
 out:
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	if (ump) {
 		bsd_free(ump->um_e2fs->s_es, M_UFSMNT);
 		bsd_free(ump->um_e2fs, M_UFSMNT);
 		bsd_free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 /*
  * unmount system call
  */
 static int
 ext2_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct ext2_sb_info *fs;
 	int error, flags, ronly, i;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		if (mp->mnt_flag & MNT_ROOTFS)
 			return (EINVAL);
 		flags |= FORCECLOSE;
 	}
 	if ((error = ext2_flushfiles(mp, flags, p)) != 0)
 		return (error);
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	ronly = fs->s_rd_only;
 	if (ronly == 0) {
 		if (fs->s_wasvalid)
 			fs->s_es->s_state |= EXT2_VALID_FS;
 		ext2_sbupdate(ump, MNT_WAIT);
 	}
 
 	/* release buffers containing group descriptors */
 	for(i = 0; i < fs->s_db_per_group; i++) 
 		ULCK_BUF(fs->s_group_desc[i])
 	bsd_free(fs->s_group_desc, M_UFSMNT);
 
 	/* release cached inode/block bitmaps */
         for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
                 if (fs->s_inode_bitmap[i])
 			ULCK_BUF(fs->s_inode_bitmap[i])
 
         for (i = 0; i < EXT2_MAX_GROUP_LOADED; i++)
                 if (fs->s_block_bitmap[i])
 			ULCK_BUF(fs->s_block_bitmap[i])
 
 	ump->um_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 	vrele(ump->um_devvp);
 	bsd_free(fs->s_es, M_UFSMNT);
 	bsd_free(fs, M_UFSMNT);
 	bsd_free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 static int
 ext2_flushfiles(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	int error;
 #if QUOTA
 	int i;
 #endif
 
 	ump = VFSTOUFS(mp);
 #if QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		if ((error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) != 0)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(p, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
 	error = vflush(mp, NULLVP, flags);
 	return (error);
 }
 
 /*
  * Get file system statistics.
  * taken from ext2/super.c ext2_statfs
  */
 static int
 ext2_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
         unsigned long overhead;
 	unsigned long overhead_per_group;
 
 	register struct ufsmount *ump;
 	register struct ext2_sb_info *fs;
 	register struct ext2_super_block *es;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_e2fs;
 	es = fs->s_es;
 
 	if (es->s_magic != EXT2_SUPER_MAGIC)
 		panic("ext2_statfs - magic number spoiled");
 
 	/*
 	 * Compute the overhead (FS structures)
 	 */
 	overhead_per_group = 1 /* super block */ +
 			     fs->s_db_per_group +
 			     1 /* block bitmap */ +
 			     1 /* inode bitmap */ +
 			     fs->s_itb_per_group;
 	overhead = es->s_first_data_block + 
 		   fs->s_groups_count * overhead_per_group;
 
 	sbp->f_bsize = EXT2_FRAG_SIZE(fs);	
 	sbp->f_iosize = EXT2_BLOCK_SIZE(fs);
 	sbp->f_blocks = es->s_blocks_count - overhead;
 	sbp->f_bfree = es->s_free_blocks_count; 
 	sbp->f_bavail = sbp->f_bfree - es->s_r_blocks_count; 
 	sbp->f_files = es->s_inodes_count; 
 	sbp->f_ffree = es->s_free_inodes_count; 
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 static int
 ext2_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *nvp, *vp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct ext2_sb_info *fs;
 	int error, allerror = 0;
 
 	fs = ump->um_e2fs;
 	if (fs->s_dirt != 0 && fs->s_rd_only != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ext2_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		ip = VTOI(vp);
 		if (vp->v_type == VNON ||
 		    ((ip->i_flag &
 		    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
 		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0, p);
 	}
 #if QUOTA
 	qsync(mp);
 #endif
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->s_dirt != 0) {
 		fs->s_dirt = 0;
 		fs->s_es->s_wtime = time_second;
 		if ((error = ext2_sbupdate(ump, waitfor)) != 0)
 			allerror = error;
 	}
 	return (allerror);
 }
 
 /*
  * Look up a EXT2FS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int
 ext2_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	register struct ext2_sb_info *fs;
 	register struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int i, error;
 	int used_blocks;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 restart:
 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
 		return (0);
 
 	/*
 	 * Lock out the creation of new entries in the FFS hash table in
 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
 	 * may occur!
 	 */
 	if (ext2fs_inode_hash_lock) {
 		while (ext2fs_inode_hash_lock) {
 			ext2fs_inode_hash_lock = -1;
 			tsleep(&ext2fs_inode_hash_lock, PVM, "e2vget", 0);
 		}
 		goto restart;
 	}
 	ext2fs_inode_hash_lock = 1;
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ext2_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ext2_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	MALLOC(ip, struct inode *, sizeof(struct inode), M_EXT2NODE, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	if ((error = getnewvnode(VT_UFS, mp, ext2_vnodeop_p, &vp)) != 0) {
 		if (ext2fs_inode_hash_lock < 0)
 			wakeup(&ext2fs_inode_hash_lock);
 		ext2fs_inode_hash_lock = 0;
 		*vpp = NULL;
 		FREE(ip, M_EXT2NODE);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	lockinit(&ip->i_lock, PINOD, "ext2in", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_e2fs = fs = ump->um_e2fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #if QUOTA
 	for (i = 0; i < MAXQUOTAS; i++)
 		ip->i_dquot[i] = NODQUOT;
 #endif
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	ufs_ihashins(ip);
 
 	if (ext2fs_inode_hash_lock < 0)
 		wakeup(&ext2fs_inode_hash_lock);
 	ext2fs_inode_hash_lock = 0;
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 #if 0
 printf("ext2_vget(%d) dbn= %d ", ino, fsbtodb(fs, ino_to_fsba(fs, ino)));
 #endif
 	if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->s_blocksize, NOCRED, &bp)) != 0) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		vput(vp);
 		brelse(bp);
 		*vpp = NULL;
 		return (error);
 	}
 	/* convert ext2 inode to dinode */
 	ext2_ei2di((struct ext2_inode *) ((char *)bp->b_data + EXT2_INODE_SIZE *
 			ino_to_fsbo(fs, ino)), &ip->i_din);
 	ip->i_block_group = ino_to_cg(fs, ino);
 	ip->i_next_alloc_block = 0;
 	ip->i_next_alloc_goal = 0;
 	ip->i_prealloc_count = 0;
 	ip->i_prealloc_block = 0;
         /* now we want to make sure that block pointers for unused
            blocks are zeroed out - ext2_balloc depends on this 
 	   although for regular files and directories only
 	*/
 	if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) {
 		used_blocks = (ip->i_size+fs->s_blocksize-1) / fs->s_blocksize;
 		for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++)
 			ip->i_db[i] = 0;
 	}
 /*
 	ext2_print_inode(ip);
 */
 	brelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	if ((error = ufs_vinit(mp, ext2_specop_p, ext2_fifoop_p, &vp)) != 0) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ext2_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 static int
 ext2_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	register struct ufid *ufhp;
 	struct ext2_sb_info *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_e2fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->s_groups_count * fs->s_es->s_inodes_per_group)
 		return (ESTALE);
 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 static int
 ext2_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct inode *ip;
 	register struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ext2_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	register struct ext2_sb_info *fs = mp->um_e2fs;
 	register struct ext2_super_block *es = fs->s_es;
 	register struct buf *bp;
 	int error = 0;
 /*
 printf("\nupdating superblock, waitfor=%s\n", waitfor == MNT_WAIT ? "yes":"no");
 */
 	bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0);
 	bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2_super_block));
 	if (waitfor == MNT_WAIT)
 		error = bwrite(bp);
 	else
 		bawrite(bp);
 
 	/*
 	 * The buffers for group descriptors, inode bitmaps and block bitmaps
 	 * are not busy at this point and are (hopefully) written by the
 	 * usual sync mechanism. No need to write them here
 		 */
 
 	return (error);
 }
Index: head/sys/isofs/cd9660/cd9660_vfsops.c
===================================================================
--- head/sys/isofs/cd9660/cd9660_vfsops.c	(revision 49534)
+++ head/sys/isofs/cd9660/cd9660_vfsops.c	(revision 49535)
@@ -1,956 +1,955 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
- * $Id: cd9660_vfsops.c,v 1.55 1999/05/08 06:39:32 phk Exp $
+ * $Id: cd9660_vfsops.c,v 1.56 1999/05/31 11:27:21 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/cdio.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/syslog.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/iso_rrip.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/cd9660_mount.h>
 
 MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
 MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
 
 static int cd9660_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int cd9660_start __P((struct mount *, int, struct proc *));
 static int cd9660_unmount __P((struct mount *, int, struct proc *));
 static int cd9660_root __P((struct mount *, struct vnode **));
 static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, 
 	    struct proc *));
 static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int cd9660_sync __P((struct mount *, int, struct ucred *, 
 	    struct proc *));
 static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
 static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int cd9660_vptofh __P((struct vnode *, struct fid *));
 
 static struct vfsops cd9660_vfsops = {
 	cd9660_mount,
 	cd9660_start,
 	cd9660_unmount,
 	cd9660_root,
 	cd9660_quotactl,
 	cd9660_statfs,
 	cd9660_sync,
 	cd9660_vget,
 	cd9660_fhtovp,
 	cd9660_vptofh,
 	cd9660_init
 };
 VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
 
 
 /*
  * Called by vfs_mountroot when iso is going to be mounted as root.
  */
 
 static int iso_get_ssector __P((dev_t dev, struct proc *p));
 static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
 			    struct proc *p, struct iso_args *argp));
 
 /*
  * Try to find the start of the last data track on this CD-ROM.  This
  * is used to mount the last session of a multi-session CD.  Bail out
  * and return 0 if we fail, this is always a safe bet.
  */
 static int
 iso_get_ssector(dev, p)
 	dev_t dev;
 	struct proc *p;
 {
 	struct ioc_toc_header h;
 	struct ioc_read_toc_single_entry t;
 	int i;
 	struct cdevsw *bd;
 	d_ioctl_t *ioctlp;
 
 	bd = bdevsw(dev);
 	ioctlp = bd->d_ioctl;
 	if (ioctlp == NULL)
 		return 0;
 
 	if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0)
 		return 0;
 
 	for (i = h.ending_track; i >= 0; i--) {
 		t.address_format = CD_LBA_FORMAT;
 		t.track = i;
 		if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0)
 			return 0;
 		if ((t.entry.control & 4) != 0)
 			/* found a data track */
 			break;
 	}
 
 	if (i < 0)
 		return 0;
 
 	return ntohl(t.entry.addr.lba);
 }
 
 static int iso_mountroot __P((struct mount *mp, struct proc *p));
 
 static int
 iso_mountroot(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 	struct iso_args args;
 	int error;
 
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("iso_mountroot: can't find rootvp");
 		return (error);
 	}
 	args.flags = ISOFSMNT_ROOT;
 	args.ssector = iso_get_ssector(rootdev, p);
 	if (bootverbose)
 		printf("iso_mountroot(): using session at block %d\n",
 		       args.ssector);
 	if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0)
 		return (error);
 
 	(void)cd9660_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 cd9660_mount(mp, path, data, ndp, p)
 	register struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct iso_args args;
 	size_t size;
 	int error;
 	mode_t accessmode;
 	struct iso_mnt *imp = 0;
 
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
 		if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		return (iso_mountroot(mp, p));
 	}
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
 		return (error);
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EROFS);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		imp = VFSTOISOFS(mp);
 		if (bdevsw(imp->im_devvp->v_rdev)->d_flags &
 		    D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (args.fspec == 0)
 			return (vfs_export(mp, &imp->im_export, &args.export));
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)))
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return ENOTBLK;
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return ENXIO;
 	}
 
 	/*       
 	 * Verify that user has necessary permissions on the device,
 	 * or has superuser abilities
 	 */
 	accessmode = VREAD;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 	if (error) 
 		error = suser(p);
 	if (error) {
 		vput(devvp);
 		return (error);
 	}
 	VOP_UNLOCK(devvp, 0, p);
 
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		error = iso_mountfs(devvp, mp, p, &args);
 	} else {
 		if (devvp != imp->im_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return error;
 	}
 	imp = VFSTOISOFS(mp);
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
 	return 0;
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 iso_mountfs(devvp, mp, p, argp)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct iso_args *argp;
 {
 	register struct iso_mnt *isomp = (struct iso_mnt *)0;
 	struct buf *bp = NULL;
 	struct buf *pribp = NULL, *supbp = NULL;
 	dev_t dev = devvp->v_rdev;
 	int error = EINVAL;
 	int needclose = 0;
 	int high_sierra = 0;
 	int iso_bsize;
 	int iso_blknum;
 	int joliet_level;
 	struct iso_volume_descriptor *vdp = 0;
 	struct iso_primary_descriptor *pri = NULL;
 	struct iso_sierra_primary_descriptor *pri_sierra = NULL;
 	struct iso_supplementary_descriptor *sup = NULL;
 	struct iso_directory_record *rootp;
 	int logical_block_size;
 
 	if (!(mp->mnt_flag & MNT_RDONLY))
 		return EROFS;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)))
 		return error;
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return EBUSY;
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
 		return (error);
 
 	if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p)))
 		return error;
 	needclose = 1;
 
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
 	 * whichever is greater.  For now, we'll just use a constant.
 	 */
 	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
 
 	joliet_level = 0;
 	for (iso_blknum = 16 + argp->ssector;
 	     iso_blknum < 100 + argp->ssector;
 	     iso_blknum++) {
 		if ((error = bread(devvp, iso_blknum * btodb(iso_bsize),
 				  iso_bsize, NOCRED, &bp)) != 0)
 			goto out;
 		
 		vdp = (struct iso_volume_descriptor *)bp->b_data;
 		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
 			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
 				  sizeof vdp->id) != 0) {
 				error = EINVAL;
 				goto out;
 			} else
 				high_sierra = 1;
 		}
 		switch (isonum_711 (high_sierra? vdp->type_sierra: vdp->type)){
 		case ISO_VD_PRIMARY:
 			if (pribp == NULL) {
 				pribp = bp;
 				bp = NULL;
 				pri = (struct iso_primary_descriptor *)vdp;
 				pri_sierra =
 				  (struct iso_sierra_primary_descriptor *)vdp;
 			}
 			break;
 
 		case ISO_VD_SUPPLEMENTARY:
 			if (supbp == NULL) {
 				supbp = bp;
 				bp = NULL;
 				sup = (struct iso_supplementary_descriptor *)vdp;
 
 				if (!(argp->flags & ISOFSMNT_NOJOLIET)) {
 					if (bcmp(sup->escape, "%/@", 3) == 0)
 						joliet_level = 1;
 					if (bcmp(sup->escape, "%/C", 3) == 0)
 						joliet_level = 2;
 					if (bcmp(sup->escape, "%/E", 3) == 0)
 						joliet_level = 3;
 
 					if (isonum_711 (sup->flags) & 1)
 						joliet_level = 0;
 				}
 			}
 			break;
 
 		case ISO_VD_END:
 			goto vd_end;
 
 		default:
 			break;
 		}
 		if (bp) {
 			brelse(bp);
 			bp = NULL;
 		}
 	}
  vd_end:
 	if (bp) {
 		brelse(bp);
 		bp = NULL;
 	}
 
 	if (pri == NULL) {
 		error = EINVAL;
 		goto out;
 	}
 
 	logical_block_size =
 		isonum_723 (high_sierra?
 			    pri_sierra->logical_block_size:
 			    pri->logical_block_size);
 
 	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
 	    || (logical_block_size & (logical_block_size - 1)) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	rootp = (struct iso_directory_record *)
 		(high_sierra?
 		 pri_sierra->root_directory_record:
 		 pri->root_directory_record);
 
 	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
 	bzero((caddr_t)isomp, sizeof *isomp);
 	isomp->logical_block_size = logical_block_size;
 	isomp->volume_space_size =
 		isonum_733 (high_sierra?
 			    pri_sierra->volume_space_size:
 			    pri->volume_space_size);
 	isomp->joliet_level = 0;
 	/*
 	 * Since an ISO9660 multi-session CD can also access previous
 	 * sessions, we have to include them into the space consider-
 	 * ations.  This doesn't yield a very accurate number since
 	 * parts of the old sessions might be inaccessible now, but we
 	 * can't do much better.  This is also important for the NFS
 	 * filehandle validation.
 	 */
 	isomp->volume_space_size += argp->ssector;
 	bcopy (rootp, isomp->root, sizeof isomp->root);
 	isomp->root_extent = isonum_733 (rootp->extent);
 	isomp->root_size = isonum_733 (rootp->size);
 
 	isomp->im_bmask = logical_block_size - 1;
 	isomp->im_bshift = ffs(logical_block_size) - 1;
 
 	pribp->b_flags |= B_AGE;
 	brelse(pribp);
 	pribp = NULL;
 
 	mp->mnt_data = (qaddr_t)isomp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
 
 	devvp->v_specmountpoint = mp;
 
 	/* Check the Rock Ridge Extention support */
 	if (!(argp->flags & ISOFSMNT_NORRIP)) {
 		if ((error = bread(isomp->im_devvp,
 				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
 				  (isomp->im_bshift - DEV_BSHIFT),
 				  isomp->logical_block_size, NOCRED, &bp)) != 0)
 		    goto out;
 		
 		rootp = (struct iso_directory_record *)bp->b_data;
 		
 		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
 		    argp->flags	 |= ISOFSMNT_NORRIP;
 		} else {
 		    argp->flags	 &= ~ISOFSMNT_GENS;
 		}
 
 		/*
 		 * The contents are valid,
 		 * but they will get reread as part of another vnode, so...
 		 */
 		bp->b_flags |= B_AGE;
 		brelse(bp);
 		bp = NULL;
 	}
 	isomp->im_flags = argp->flags & (ISOFSMNT_NORRIP | ISOFSMNT_GENS |
 					 ISOFSMNT_EXTATT | ISOFSMNT_NOJOLIET);
 
 	if (high_sierra) {
 		/* this effectively ignores all the mount flags */
 		log(LOG_INFO, "cd9660: High Sierra Format\n");
 		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
 	} else
 		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
 		  default:
 			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
 			  break;
 		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
 			  isomp->iso_ftype = ISO_FTYPE_9660;
 			  break;
 		  case 0:
 			  log(LOG_INFO, "cd9660: RockRidge Extension\n");
 			  isomp->iso_ftype = ISO_FTYPE_RRIP;
 			  break;
 		}
 
 	/* Decide whether to use the Joliet descriptor */
 
 	if (isomp->iso_ftype != ISO_FTYPE_RRIP && joliet_level) {
 		log(LOG_INFO, "cd9660: Joliet Extension\n");
 		rootp = (struct iso_directory_record *)
 			sup->root_directory_record;
 		bcopy (rootp, isomp->root, sizeof isomp->root);
 		isomp->root_extent = isonum_733 (rootp->extent);
 		isomp->root_size = isonum_733 (rootp->size);
 		isomp->joliet_level = joliet_level;
 		supbp->b_flags |= B_AGE;
 	}
 
 	if (supbp) {
 		brelse(supbp);
 		supbp = NULL;
 	}
 
 	return 0;
 out:
 	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	if (pribp)
 		brelse(pribp);
 	if (supbp)
 		brelse(supbp);
 	if (needclose)
 		(void)VOP_CLOSE(devvp, FREAD, NOCRED, p);
 	if (isomp) {
 		free((caddr_t)isomp, M_ISOFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return error;
 }
 
 /*
  * Make a filesystem operational.
  * Nothing to do at the moment.
  */
 /* ARGSUSED */
 static int
 cd9660_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	return 0;
 }
 
 /*
  * unmount system call
  */
 static int
 cd9660_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 	int error, flags = 0;
 	
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 #if 0
 	mntflushbuf(mp, 0);
 	if (mntinvalbuf(mp))
 		return EBUSY;
 #endif
 	if ((error = vflush(mp, NULLVP, flags)))
 		return (error);
 
 	isomp = VFSTOISOFS(mp);
 
 
 	isomp->im_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
 	vrele(isomp->im_devvp);
 	free((caddr_t)isomp, M_ISOFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 cd9660_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
 	ino_t ino = isodirino(dp, imp);
 	
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
 /*
  * Do operations associated with quotas, not supported
  */
 /* ARGSUSED */
 static int
 cd9660_quotactl(mp, cmd, uid, arg, p)
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * Get file system statistics.
  */
 int
 cd9660_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 
 	isomp = VFSTOISOFS(mp);
 
 	sbp->f_bsize = isomp->logical_block_size;
 	sbp->f_iosize = sbp->f_bsize;	/* XXX */
 	sbp->f_blocks = isomp->volume_space_size;
 	sbp->f_bfree = 0; /* total free blocks */
 	sbp->f_bavail = 0; /* blocks free for non superuser */
 	sbp->f_files =	0; /* total files */
 	sbp->f_ffree = 0; /* free file nodes */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 cd9660_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is in range
  * - call iget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the generation number matches
  */
 
 struct ifid {
 	ushort	ifid_len;
 	ushort	ifid_pad;
 	int	ifid_ino;
 	long	ifid_start;
 };
 
 /* ARGSUSED */
 int
 cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct ifid *ifhp = (struct ifid *)fhp;
 	register struct iso_node *ip;
 	register struct netcred *np;
 	register struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct vnode *nvp;
 	int error;
 	
 #ifdef	ISOFS_DBG
 	printf("fhtovp: ino %d, start %ld\n",
 	       ifhp->ifid_ino, ifhp->ifid_start);
 #endif
 	
 	/*
 	 * Get the export permission structure for this <mp, client> tuple.
 	 */
 	np = vfs_export_lookup(mp, &imp->im_export, nam);
 	if (np == NULL)
 		return (EACCES);
 
 	if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->inode.iso_mode == 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 int
 cd9660_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 
 	/*
 	 * XXXX
 	 * It would be nice if we didn't always set the `relocated' flag
 	 * and force the extra read, but I don't want to think about fixing
 	 * that right now.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 #if 0
 	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
 #else
 	    0,
 #endif
 	    (struct iso_directory_record *)0));
 }
 
 int
 cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 	int relocated;
 	struct iso_directory_record *isodir;
 {
 	struct iso_mnt *imp;
 	struct iso_node *ip;
 	struct buf *bp;
 	struct vnode *vp, *nvp;
 	dev_t dev;
 	int error;
 
 	imp = VFSTOISOFS(mp);
 	dev = imp->im_dev;
 	if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
 		return (0);
 
 	/* Allocate a new vnode/iso_node. */
 	if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
 	    M_WAITOK);
 	bzero((caddr_t)ip, sizeof(struct iso_node));
 	lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	cd9660_ihashins(ip);
 
 	if (isodir == 0) {
 		int lbn, off;
 
 		lbn = lblkno(imp, ino);
 		if (lbn >= imp->volume_space_size) {
 			vput(vp);
 			printf("fhtovp: lbn exceed volume space %d\n", lbn);
 			return (ESTALE);
 		}
 	
 		off = blkoff(imp, ino);
 		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
 			vput(vp);
 			printf("fhtovp: crosses block boundary %d\n",
 			       off + ISO_DIRECTORY_RECORD_SIZE);
 			return (ESTALE);
 		}
 	
 		error = bread(imp->im_devvp,
 			      lbn << (imp->im_bshift - DEV_BSHIFT),
 			      imp->logical_block_size, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			brelse(bp);
 			printf("fhtovp: bread error %d\n",error);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)(bp->b_data + off);
 
 		if (off + isonum_711(isodir->length) >
 		    imp->logical_block_size) {
 			vput(vp);
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
 			       off +isonum_711(isodir->length), off,
 			       isonum_711(isodir->length));
 			return (ESTALE);
 		}
 	
 #if 0
 		if (isonum_733(isodir->extent) +
 		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: file start miss %d vs %d\n",
 			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
 			       ifhp->ifid_start);
 			return (ESTALE);
 		}
 #endif
 	} else
 		bp = 0;
 
 	ip->i_mnt = imp;
 	ip->i_devvp = imp->im_devvp;
 	VREF(ip->i_devvp);
 
 	if (relocated) {
 		/*
 		 * On relocated directories we must
 		 * read the `.' entry out of a dir.
 		 */
 		ip->iso_start = ino >> imp->im_bshift;
 		if (bp != 0)
 			brelse(bp);
 		if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
 			vput(vp);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)bp->b_data;
 	}
 
 	ip->iso_extent = isonum_733(isodir->extent);
 	ip->i_size = isonum_733(isodir->size);
 	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
 	
 	/*
 	 * Setup time stamp, attribute
 	 */
 	vp->v_type = VNON;
 	switch (imp->iso_ftype) {
 	default:	/* ISO_FTYPE_9660 */
 	    {
 		struct buf *bp2;
 		int off;
 		if ((imp->im_flags & ISOFSMNT_EXTATT)
 		    && (off = isonum_711(isodir->ext_attr_length)))
 			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
 				     &bp2);
 		else
 			bp2 = NULL;
 		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
 		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
 		if (bp2)
 			brelse(bp2);
 		break;
 	    }
 	case ISO_FTYPE_RRIP:
 		cd9660_rrip_analyze(isodir, ip, imp);
 		break;
 	}
 
 	if (bp != 0)
 		brelse(bp);
 
 	/*
 	 * Initialize the associated vnode
 	 */
 	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
 	case VFIFO:
 		vp->v_op = cd9660_fifoop_p;
 		break;
 	case VCHR:
 	case VBLK:
 		/*
 		 * if device, look at device number table for translation
 		 */
 		vp->v_op = cd9660_specop_p;
 		if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) {
 			/*
 			 * Discard unneeded vnode, but save its iso_node.
 			 * Note that the lock is carried over in the iso_node
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	default:
 		break;
 	}
 	
 	if (ip->iso_extent == imp->root_extent)
 		vp->v_flag |= VROOT;
 
 	/*
 	 * XXX need generation number?
 	 */
 	
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 cd9660_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct iso_node *ip = VTOI(vp);
 	register struct ifid *ifhp;
 
 	ifhp = (struct ifid *)fhp;
 	ifhp->ifid_len = sizeof(struct ifid);
 
 	ifhp->ifid_ino = ip->i_number;
 	ifhp->ifid_start = ip->iso_start;
 
 #ifdef	ISOFS_DBG
 	printf("vptofh: ino %d, start %ld\n",
 	       ifhp->ifid_ino,ifhp->ifid_start);
 #endif
 	return 0;
 }
Index: head/sys/isofs/cd9660/cd9660_vnops.c
===================================================================
--- head/sys/isofs/cd9660/cd9660_vnops.c	(revision 49534)
+++ head/sys/isofs/cd9660/cd9660_vnops.c	(revision 49535)
@@ -1,917 +1,917 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vnops.c	8.19 (Berkeley) 5/27/95
- * $Id: cd9660_vnops.c,v 1.55 1999/04/18 10:58:02 dcs Exp $
+ * $Id: cd9660_vnops.c,v 1.56 1999/05/11 19:54:25 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/unistd.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_zone.h>
 #include <vm/vnode_pager.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/iso_rrip.h>
 
 static int cd9660_setattr __P((struct vop_setattr_args *));
 static int cd9660_access __P((struct vop_access_args *));
 static int cd9660_getattr __P((struct vop_getattr_args *));
 static int cd9660_pathconf __P((struct vop_pathconf_args *));
 static int cd9660_read __P((struct vop_read_args *));
 struct isoreaddir;
 static int iso_uiodir __P((struct isoreaddir *idp, struct dirent *dp,
 			   off_t off));
 static int iso_shipdir __P((struct isoreaddir *idp));
 static int cd9660_readdir __P((struct vop_readdir_args *));
 static int cd9660_readlink __P((struct vop_readlink_args *ap));
 static int cd9660_abortop __P((struct vop_abortop_args *));
 static int cd9660_strategy __P((struct vop_strategy_args *));
 static int cd9660_print __P((struct vop_print_args *));
 static int cd9660_getpages __P((struct vop_getpages_args *));
 static int cd9660_putpages __P((struct vop_putpages_args *));
 
 /*
  * Setattr call. Only allowed for block and character special devices.
  */
 int
 cd9660_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 
   	if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
 		return (EROFS);
 	if (vap->va_size != (u_quad_t)VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
 		case VLNK:
 		case VREG:
 			return (EROFS);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 		case VNON:
 		case VBAD:
 			return (0);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
  * The mode is shifted to select the owner/group/other fields. The
  * super user is granted all permissions.
  */
 /* ARGSUSED */
 static int
 cd9660_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct iso_node *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	gid_t *gp;
 	int i;
 
 	/*
 	 * Disallow write attempts unless the file is a socket,
 	 * fifo, or a block or character device resident on the
 	 * file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			return (EROFS);
 			/* NOT REACHED */
 		default:
 			break;
 		}
 	}
 
 	/* User id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->inode.iso_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->inode.iso_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->inode.iso_mode & mask) == mask ?
 			    0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
 }
 
 static int
 cd9660_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 
 {
 	struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	register struct iso_node *ip = VTOI(vp);
 
 	vap->va_fsid	= dev2udev(ip->i_dev);
 	vap->va_fileid	= ip->i_number;
 
 	vap->va_mode	= ip->inode.iso_mode;
 	vap->va_nlink	= ip->inode.iso_links;
 	vap->va_uid	= ip->inode.iso_uid;
 	vap->va_gid	= ip->inode.iso_gid;
 	vap->va_atime	= ip->inode.iso_atime;
 	vap->va_mtime	= ip->inode.iso_mtime;
 	vap->va_ctime	= ip->inode.iso_ctime;
 	vap->va_rdev	= ip->inode.iso_rdev;
 
 	vap->va_size	= (u_quad_t) ip->i_size;
 	if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) {
 		struct vop_readlink_args rdlnk;
 		struct iovec aiov;
 		struct uio auio;
 		char *cp;
 
 		MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = ap->a_p;
 		auio.uio_resid = MAXPATHLEN;
 		rdlnk.a_uio = &auio;
 		rdlnk.a_vp = ap->a_vp;
 		rdlnk.a_cred = ap->a_cred;
 		if (cd9660_readlink(&rdlnk) == 0)
 			vap->va_size = MAXPATHLEN - auio.uio_resid;
 		FREE(cp, M_TEMP);
 	}
 	vap->va_flags	= 0;
 	vap->va_gen = 1;
 	vap->va_blocksize = ip->i_mnt->logical_block_size;
 	vap->va_bytes	= (u_quad_t) ip->i_size;
 	vap->va_type	= vp->v_type;
 	vap->va_filerev	= 0;
 	return (0);
 }
 
 /*
  * Vnode op for reading.
  */
 static int
 cd9660_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	register struct iso_node *ip = VTOI(vp);
 	register struct iso_mnt *imp;
 	struct buf *bp;
 	daddr_t lbn, rablock;
 	off_t diff;
 	int rasize, error = 0;
 	long size, n, on;
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip->i_flag |= IN_ACCESS;
 	imp = ip->i_mnt;
 	do {
 		lbn = lblkno(imp, uio->uio_offset);
 		on = blkoff(imp, uio->uio_offset);
 		n = min((u_int)(imp->logical_block_size - on),
 			uio->uio_resid);
 		diff = (off_t)ip->i_size - uio->uio_offset;
 		if (diff <= 0)
 			return (0);
 		if (diff < n)
 			n = diff;
 		size = blksize(imp, ip, lbn);
 		rablock = lbn + 1;
 		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
 			if (lblktosize(imp, rablock) < ip->i_size)
 				error = cluster_read(vp, (off_t)ip->i_size,
 				         lbn, size, NOCRED, uio->uio_resid,
 					 (ap->a_ioflag >> 16), &bp);
 			else
 				error = bread(vp, lbn, size, NOCRED, &bp);
 		} else {
 			if (vp->v_lastr + 1 == lbn &&
 			    lblktosize(imp, rablock) < ip->i_size) {
 				rasize = blksize(imp, ip, rablock);
 				error = breadn(vp, lbn, size, &rablock,
 					       &rasize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, lbn, size, NOCRED, &bp);
 		}
 		vp->v_lastr = lbn;
 		n = min(n, size - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 
 		error = uiomove(bp->b_data + on, (int)n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	return (error);
 }
 
 /*
  * Structure for reading directories
  */
 struct isoreaddir {
 	struct dirent saveent;
 	struct dirent assocent;
 	struct dirent current;
 	off_t saveoff;
 	off_t assocoff;
 	off_t curroff;
 	struct uio *uio;
 	off_t uio_off;
 	int eofflag;
 	u_long *cookies;
 	int ncookies;
 };
 
 int
 iso_uiodir(idp,dp,off)
 	struct isoreaddir *idp;
 	struct dirent *dp;
 	off_t off;
 {
 	int error;
 
 	dp->d_name[dp->d_namlen] = 0;
 	dp->d_reclen = GENERIC_DIRSIZ(dp);
 
 	if (idp->uio->uio_resid < dp->d_reclen) {
 		idp->eofflag = 0;
 		return (-1);
 	}
 
 	if (idp->cookies) {
 		if (idp->ncookies <= 0) {
 			idp->eofflag = 0;
 			return (-1);
 		}
 
 		*idp->cookies++ = off;
 		--idp->ncookies;
 	}
 
 	if ((error = uiomove((caddr_t) dp,dp->d_reclen,idp->uio)) != 0)
 		return (error);
 	idp->uio_off = off;
 	return (0);
 }
 
 int
 iso_shipdir(idp)
 	struct isoreaddir *idp;
 {
 	struct dirent *dp;
 	int cl, sl, assoc;
 	int error;
 	char *cname, *sname;
 
 	cl = idp->current.d_namlen;
 	cname = idp->current.d_name;
 assoc = (cl > 1) && (*cname == ASSOCCHAR);
 	if (assoc) {
 		cl--;
 		cname++;
 	}
 
 	dp = &idp->saveent;
 	sname = dp->d_name;
 	if (!(sl = dp->d_namlen)) {
 		dp = &idp->assocent;
 		sname = dp->d_name + 1;
 		sl = dp->d_namlen - 1;
 	}
 	if (sl > 0) {
 		if (sl != cl
 		    || bcmp(sname,cname,sl)) {
 			if (idp->assocent.d_namlen) {
 				if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0)
 					return (error);
 				idp->assocent.d_namlen = 0;
 			}
 			if (idp->saveent.d_namlen) {
 				if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0)
 					return (error);
 				idp->saveent.d_namlen = 0;
 			}
 		}
 	}
 	idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current);
 	if (assoc) {
 		idp->assocoff = idp->curroff;
 		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
 	} else {
 		idp->saveoff = idp->curroff;
 		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
 	}
 	return (0);
 }
 
 /*
  * Vnode op for readdir
  */
 static int
 cd9660_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long *a_cookies;
 	} */ *ap;
 {
 	register struct uio *uio = ap->a_uio;
 	struct isoreaddir *idp;
 	struct vnode *vdp = ap->a_vp;
 	struct iso_node *dp;
 	struct iso_mnt *imp;
 	struct buf *bp = NULL;
 	struct iso_directory_record *ep;
 	int entryoffsetinblock;
 	doff_t endsearch;
 	u_long bmask;
 	int error = 0;
 	int reclen;
 	u_short namelen;
 	int ncookies = 0;
 	u_long *cookies = NULL;
 
 	dp = VTOI(vdp);
 	imp = dp->i_mnt;
 	bmask = imp->im_bmask;
 
 	MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK);
 	idp->saveent.d_namlen = idp->assocent.d_namlen = 0;
 	/*
 	 * XXX
 	 * Is it worth trying to figure out the type?
 	 */
 	idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type =
 	    DT_UNKNOWN;
 	idp->uio = uio;
 	if (ap->a_ncookies == NULL) {
 		idp->cookies = NULL;
 	} else {
 		/*
 		 * Guess the number of cookies needed.
 		 */
 		ncookies = uio->uio_resid / 16;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_int), M_TEMP,
 		    M_WAITOK);
 		idp->cookies = cookies;
 		idp->ncookies = ncookies;
 	}
 	idp->eofflag = 1;
 	idp->curroff = uio->uio_offset;
 
 	if ((entryoffsetinblock = idp->curroff & bmask) &&
 	    (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) {
 		FREE(idp, M_TEMP);
 		return (error);
 	}
 	endsearch = dp->i_size;
 
 	while (idp->curroff < endsearch) {
 		/*
 		 * If offset is on a block boundary,
 		 * read the next directory block.
 		 * Release previous if it exists.
 		 */
 		if ((idp->curroff & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			if ((error =
 			    cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0)
 				break;
 			entryoffsetinblock = 0;
 		}
 		/*
 		 * Get pointer to next entry.
 		 */
 		ep = (struct iso_directory_record *)
 			((char *)bp->b_data + entryoffsetinblock);
 
 		reclen = isonum_711(ep->length);
 		if (reclen == 0) {
 			/* skip to next block, if any */
 			idp->curroff =
 			    (idp->curroff & ~bmask) + imp->logical_block_size;
 			continue;
 		}
 
 		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
 			error = EINVAL;
 			/* illegal entry, stop */
 			break;
 		}
 
 		if (entryoffsetinblock + reclen > imp->logical_block_size) {
 			error = EINVAL;
 			/* illegal directory, so stop looking */
 			break;
 		}
 
 		idp->current.d_namlen = isonum_711(ep->name_len);
 
 		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
 			error = EINVAL;
 			/* illegal entry, stop */
 			break;
 		}
 
 		if (isonum_711(ep->flags)&2)
 			idp->current.d_fileno = isodirino(ep, imp);
 		else
 			idp->current.d_fileno = dbtob(bp->b_blkno) +
 				entryoffsetinblock;
 
 		idp->curroff += reclen;
 
 		switch (imp->iso_ftype) {
 		case ISO_FTYPE_RRIP:
 			cd9660_rrip_getname(ep,idp->current.d_name, &namelen,
 					   &idp->current.d_fileno,imp);
 			idp->current.d_namlen = (u_char)namelen;
 			if (idp->current.d_namlen)
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			break;
 		default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/
 			strcpy(idp->current.d_name,"..");
 			if (idp->current.d_namlen == 1 && ep->name[0] == 0) {
 				idp->current.d_namlen = 1;
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			} else if (idp->current.d_namlen == 1 && ep->name[0] == 1) {
 				idp->current.d_namlen = 2;
 				error = iso_uiodir(idp,&idp->current,idp->curroff);
 			} else {
 				isofntrans(ep->name,idp->current.d_namlen,
 					   idp->current.d_name, &namelen,
 					   imp->iso_ftype == ISO_FTYPE_9660,
 					   isonum_711(ep->flags)&4,
 					   imp->joliet_level);
 				idp->current.d_namlen = (u_char)namelen;
 				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
 					error = iso_shipdir(idp);
 				else
 					error = iso_uiodir(idp,&idp->current,idp->curroff);
 			}
 		}
 		if (error)
 			break;
 
 		entryoffsetinblock += reclen;
 	}
 
 	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
 		idp->current.d_namlen = 0;
 		error = iso_shipdir(idp);
 	}
 	if (error < 0)
 		error = 0;
 
 	if (ap->a_ncookies != NULL) {
 		if (error)
 			free(cookies, M_TEMP);
 		else {
 			/*
 			 * Work out the number of cookies actually used.
 			 */
 			*ap->a_ncookies = ncookies - idp->ncookies;
 			*ap->a_cookies = cookies;
 		}
 	}
 
 	if (bp)
 		brelse (bp);
 
 	uio->uio_offset = idp->uio_off;
 	*ap->a_eofflag = idp->eofflag;
 
 	FREE(idp, M_TEMP);
 
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  * Shouldn't we get the parent vnode and read the data from there?
  * This could eventually result in deadlocks in cd9660_lookup.
  * But otherwise the block read here is in the block buffer two times.
  */
 typedef struct iso_directory_record ISODIR;
 typedef struct iso_node		    ISONODE;
 typedef struct iso_mnt		    ISOMNT;
 static int
 cd9660_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	ISONODE	*ip;
 	ISODIR	*dirp;
 	ISOMNT	*imp;
 	struct	buf *bp;
 	struct	uio *uio;
 	u_short	symlen;
 	int	error;
 	char	*symname;
 
 	ip  = VTOI(ap->a_vp);
 	imp = ip->i_mnt;
 	uio = ap->a_uio;
 
 	if (imp->iso_ftype != ISO_FTYPE_RRIP)
 		return (EINVAL);
 
 	/*
 	 * Get parents directory record block that this inode included.
 	 */
 	error = bread(imp->im_devvp,
 		      (ip->i_number >> imp->im_bshift) <<
 		      (imp->im_bshift - DEV_BSHIFT),
 		      imp->logical_block_size, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Setup the directory pointer for this inode
 	 */
 	dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask));
 
 	/*
 	 * Just make sure, we have a right one....
 	 *   1: Check not cross boundary on block
 	 */
 	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
 	    > (unsigned)imp->logical_block_size) {
 		brelse(bp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Now get a buffer
 	 * Abuse a namei buffer for now.
 	 */
 	if (uio->uio_segflg == UIO_SYSSPACE)
 		symname = uio->uio_iov->iov_base;
 	else
 		symname = zalloc(namei_zone);
 	
 	/*
 	 * Ok, we just gathering a symbolic name in SL record.
 	 */
 	if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) {
 		if (uio->uio_segflg != UIO_SYSSPACE)
 			zfree(namei_zone, symname);
 		brelse(bp);
 		return (EINVAL);
 	}
 	/*
 	 * Don't forget before you leave from home ;-)
 	 */
 	brelse(bp);
 
 	/*
 	 * return with the symbolic name to caller's.
 	 */
 	if (uio->uio_segflg != UIO_SYSSPACE) {
 		error = uiomove(symname, symlen, uio);
 		zfree(namei_zone, symname);
 		return (error);
 	}
 	uio->uio_resid -= symlen;
 	uio->uio_iov->iov_base += symlen;
 	uio->uio_iov->iov_len -= symlen;
 	return (0);
 }
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
  * done. If a buffer has been saved in anticipation of a CREATE, delete it.
  */
 static int
 cd9660_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 static int
 cd9660_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct iso_node *ip;
 	int error;
 
 	ip = VTOI(vp);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("cd9660_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		if ((error =
 		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL))) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	vp = ip->i_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOP_STRATEGY(vp, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 cd9660_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_ISOFS, isofs vnode\n");
 	return (0);
 }
 
 /*
  * Return POSIX pathconf information applicable to cd9660 filesystems.
  */
 static int
 cd9660_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP)
 			*ap->a_retval = NAME_MAX;
 		else
 			*ap->a_retval = 37;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * get page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 cd9660_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 /*
  * put page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 cd9660_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 
 /*
  * Global vfs data structures for cd9660
  */
 vop_t **cd9660_vnodeop_p;
 static struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) cd9660_abortop },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_bmap_desc,		(vop_t *) cd9660_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) cd9660_lookup },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_pathconf_desc,		(vop_t *) cd9660_pathconf },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_read_desc,		(vop_t *) cd9660_read },
 	{ &vop_readdir_desc,		(vop_t *) cd9660_readdir },
 	{ &vop_readlink_desc,		(vop_t *) cd9660_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_strategy_desc,		(vop_t *) cd9660_strategy },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_getpages_desc,		(vop_t *) cd9660_getpages },
 	{ &vop_putpages_desc,		(vop_t *) cd9660_putpages },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_vnodeop_opv_desc =
 	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
 VNODEOP_SET(cd9660_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **cd9660_specop_p;
 static struct vnodeopv_entry_desc cd9660_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_specop_opv_desc =
 	{ &cd9660_specop_p, cd9660_specop_entries };
 VNODEOP_SET(cd9660_specop_opv_desc);
 
 vop_t **cd9660_fifoop_p;
 static struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) cd9660_access },
 	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
 	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) cd9660_print },
 	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc cd9660_fifoop_opv_desc =
 	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
 
 VNODEOP_SET(cd9660_fifoop_opv_desc);
Index: head/sys/kern/kern_conf.c
===================================================================
--- head/sys/kern/kern_conf.c	(revision 49534)
+++ head/sys/kern/kern_conf.c	(revision 49535)
@@ -1,285 +1,304 @@
 /*-
  * Parts Copyright (c) 1995 Terrence R. Lambert
  * Copyright (c) 1995 Julian R. Elischer
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *      This product includes software developed by Terrence R. Lambert.
  * 4. The name Terrence R. Lambert may not be used to endorse or promote
  *    products derived from this software without specific prior written
  *    permission.
  *
  * THIS SOFTWARE IS PROVIDED BY Julian R. Elischer ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE TERRENCE R. LAMBERT BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- * $Id: kern_conf.c,v 1.53 1999/07/20 21:51:12 green Exp $
+ * $Id: kern_conf.c,v 1.54 1999/08/08 00:34:00 grog Exp $
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/malloc.h>
 #include <sys/conf.h>
 #include <sys/vnode.h>
 #include <sys/queue.h>
+#include <machine/stdarg.h>
 
-#include <miscfs/specfs/specdev.h>
-
 #define cdevsw_ALLOCSTART	(NUMCDEVSW/2)
 
 struct cdevsw 	*cdevsw[NUMCDEVSW];
 
 static int	bmaj2cmaj[NUMCDEVSW];
 
 MALLOC_DEFINE(M_DEVT, "dev_t", "dev_t storage");
 
 #define DEVT_HASH 83
 #define DEVT_STASH 50
 
 static struct specinfo devt_stash[DEVT_STASH];
 
 static SLIST_HEAD(devt_hash_head, specinfo) dev_hash[DEVT_HASH];
 
 /*
  * Routine to convert from character to block device number.
  *
  * A minimal stub routine can always return NODEV.
  */
 dev_t
 chrtoblk(dev_t dev)
 {
 	struct cdevsw *cd;
 
 	if((cd = devsw(dev)) != NULL) {
           if (cd->d_bmaj != -1)
 	    return(makebdev(cd->d_bmaj,minor(dev)));
 	}
 	return(NODEV);
 }
 
 struct cdevsw *
 devsw(dev_t dev)
 {       
         return(cdevsw[major(dev)]);
 }
 
 struct cdevsw *
 bdevsw(dev_t dev)
 {
         return(cdevsw[major(dev)]);
 }
 
 /*
  *  Add a cdevsw entry
  */
 
 int
 cdevsw_add(struct cdevsw *newentry)
 {
 	int i;
 	static int setup;
 
 	if (!setup) {
 		for (i = 0; i < NUMCDEVSW; i++)
 			if (!bmaj2cmaj[i])
 				bmaj2cmaj[i] = 254;
 		setup++;
 	}
 
 	if (newentry->d_maj < 0 || newentry->d_maj >= NUMCDEVSW) {
 		printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
 		    newentry->d_name, newentry->d_maj);
 		return EINVAL;
 	}
 
 	if (cdevsw[newentry->d_maj]) {
 		printf("WARNING: \"%s\" is usurping \"%s\"'s cdevsw[]\n",
 		    newentry->d_name, cdevsw[newentry->d_maj]->d_name);
 	}
 	cdevsw[newentry->d_maj] = newentry;
 
 	if (newentry->d_bmaj >= 0 && newentry->d_bmaj < NUMCDEVSW) {
 		if (bmaj2cmaj[newentry->d_bmaj] != 254) {
 			printf("WARNING: \"%s\" is usurping \"%s\"'s bmaj\n",
 			    newentry->d_name, 
 			    cdevsw[bmaj2cmaj[newentry->d_bmaj]]->d_name);
 		}
 		bmaj2cmaj[newentry->d_bmaj] = newentry->d_maj;
 	}
 
 	return 0;
 } 
 
 /*
  *  Remove a cdevsw entry
  */
 
 int
 cdevsw_remove(struct cdevsw *oldentry)
 {
 	if (oldentry->d_maj < 0 || oldentry->d_maj >= NUMCDEVSW) {
 		printf("%s: ERROR: driver has bogus cdevsw->d_maj = %d\n",
 		    oldentry->d_name, oldentry->d_maj);
 		return EINVAL;
 	}
 
 	cdevsw[oldentry->d_maj] = NULL;
 
 	if (oldentry->d_bmaj >= 0 && oldentry->d_bmaj < NUMCDEVSW) 
 		bmaj2cmaj[oldentry->d_bmaj] = 254;
 
 	return 0;
 } 
 
 int
 devsw_module_handler(module_t mod, int what, void* arg)
 {
 	struct devsw_module_data* data = (struct devsw_module_data*) arg;
 	int error = 0;
 
 	switch (what) {
 	case MOD_LOAD:
 		error = cdevsw_add(data->cdevsw);
 		if (!error && data->chainevh)
 			error = data->chainevh(mod, what, data->chainarg);
 		return error;
 
 	case MOD_UNLOAD:
 		if (data->chainevh) {
 			error = data->chainevh(mod, what, data->chainarg);
 			if (error)
 				return error;
 		}
 		cdevsw_remove(data->cdevsw);
 		return error;
 	}
 
 	if (data->chainevh)
 		return data->chainevh(mod, what, data->chainarg);
 	else
 		return 0;
 }
 
 /*
  * dev_t and u_dev_t primitives
  */
 
 int 
 major(dev_t x)
 {
 	if (x == NODEV)
 		return NOUDEV;
 	return((x->si_udev >> 8) & 0xff);
 }
 
 int
 minor(dev_t x)
 {
 	if (x == NODEV)
 		return NOUDEV;
 	return(x->si_udev & 0xffff00ff);
 }
 
 dev_t
 makebdev(int x, int y)
 {
 	return (makedev(bmaj2cmaj[x], y));
 }
 
 dev_t
 makedev(int x, int y)
 {
 	struct specinfo *si;
 	udev_t	udev;
 	int hash;
 	static int stashed;
 
 	udev = (x << 8) | y;
 	hash = udev % DEVT_HASH;
 	SLIST_FOREACH(si, &dev_hash[hash], si_hash) {
 		if (si->si_udev == udev)
 			return (si);
 	}
 	if (stashed >= DEVT_STASH) {
 		MALLOC(si, struct specinfo *, sizeof(*si), M_DEVT, 
 		    M_USE_RESERVE);
 	} else {
 		si = devt_stash + stashed++;
 	}
 	bzero(si, sizeof(*si));
 	si->si_udev = udev;
 	si->si_bsize_phys = DEV_BSIZE;
 	si->si_bsize_best = BLKDEV_IOSIZE;
 	si->si_bsize_max = MAXBSIZE;
+	if (y > 256)
+		sprintf(si->si_name, "#%d/0x%x", x, y);
+	else
+		sprintf(si->si_name, "#%d/%d", x, y);
 	SLIST_INSERT_HEAD(&dev_hash[hash], si, si_hash);
         return (si);
 }
 
 udev_t
 dev2udev(dev_t x)
 {
 	if (x == NODEV)
 		return NOUDEV;
 	return (x->si_udev);
 }
 
 udev_t
 dev2budev(dev_t x)
 {
 	if (x == NODEV)
 		return NOUDEV;
 	else
 		return makeudev(devsw(x)->d_bmaj, minor(x));
 }
 
 dev_t
 udev2dev(udev_t x, int b)
 {
 	switch (b) {
 		case 0:
 			return makedev(umajor(x), uminor(x));
 		case 1:
 			return makebdev(umajor(x), uminor(x));
 		default:
 			Debugger("udev2dev(...,X)");
 			return NODEV;
 	}
 }
 
 int
 uminor(udev_t dev)
 {
 	return(dev & 0xffff00ff);
 }
 
 int
 umajor(udev_t dev)
 {
 	return((dev & 0xff00) >> 8);
 }
 
 udev_t
 makeudev(int x, int y)
 {
         return ((x << 8) | y);
+}
+
+dev_t
+make_dev(struct cdevsw *devsw, int minor, uid_t uid, gid_t gid, int perms, char *fmt, ...)
+{
+	dev_t	dev;
+	va_list ap;
+	int i;
+
+	dev = makedev(devsw->d_maj, minor);
+	va_start(ap, fmt);
+	i = kvprintf(fmt, NULL, dev->si_name, 32, ap);
+	dev->si_name[i] = '\0';
+	va_end(ap);
+	dev->si_devsw = devsw;
+	return (dev);
 }
 
Index: head/sys/kern/kern_mib.c
===================================================================
--- head/sys/kern/kern_mib.c	(revision 49534)
+++ head/sys/kern/kern_mib.c	(revision 49535)
@@ -1,251 +1,251 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Mike Karels at Berkeley Software Design, Inc.
  *
  * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD
  * project, to make these variables more userfriendly.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_sysctl.c	8.4 (Berkeley) 4/14/94
- * $Id: kern_mib.c,v 1.21 1999/07/19 09:13:12 phk Exp $
+ * $Id: kern_mib.c,v 1.22 1999/07/20 07:19:32 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
 #include <sys/unistd.h>
 
 #if defined(SMP)
 #include <machine/smp.h>
 #endif
 
 SYSCTL_NODE(, 0,	  sysctl, CTLFLAG_RW, 0,
 	"Sysctl internal magic");
 SYSCTL_NODE(, CTL_KERN,	  kern,   CTLFLAG_RW, 0,
 	"High kernel, proc, limits &c");
 SYSCTL_NODE(, CTL_VM,	  vm,     CTLFLAG_RW, 0,
 	"Virtual memory");
 SYSCTL_NODE(, CTL_VFS,	  vfs,     CTLFLAG_RW, 0,
 	"File system");
 SYSCTL_NODE(, CTL_NET,	  net,    CTLFLAG_RW, 0,
 	"Network, (see socket.h)");
 SYSCTL_NODE(, CTL_DEBUG,  debug,  CTLFLAG_RW, 0,
 	"Debugging");
 SYSCTL_NODE(_debug, OID_AUTO,  sizeof,  CTLFLAG_RW, 0,
 	"Sizeof various things");
 SYSCTL_NODE(, CTL_HW,	  hw,     CTLFLAG_RW, 0,
 	"hardware");
 SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0,
 	"machine dependent");
 SYSCTL_NODE(, CTL_USER,	  user,   CTLFLAG_RW, 0,
 	"user-level");
 
 SYSCTL_NODE(, CTL_P1003_1B,  p1003_1b,   CTLFLAG_RW, 0,
 	"p1003_1b, (see p1003_1b.h)");
 
 SYSCTL_NODE(_kern, OID_AUTO,  prison,   CTLFLAG_RW, 0,
 	"Prison rules");
 
 SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD, 
     osrelease, 0, "Operating system type");
 
 SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, 
     0, BSD, "Operating system revision");
 
 SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD, 
     version, 0, "Kernel version");
 
 SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD, 
     ostype, 0, "Operating system type");
 
 extern int osreldate;
 SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, 
     &osreldate, 0, "Operating system release date");
 
 SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RD, 
     &maxproc, 0, "Maximum number of processes");
 
 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, 
     &maxprocperuid, 0, "Maximum processes allowed per userid");
 
 SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD, 
     0, ARG_MAX, "Maximum bytes of argument to execve(2)");
 
 SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD, 
     0, _KPOSIX_VERSION, "Version of POSIX attempting to comply to");
 
 SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RD, 
     0, NGROUPS_MAX, "Maximum number of groups a user can belong to");
 
 SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD, 
     0, 1, "Whether job control is available");
 
 #ifdef _POSIX_SAVED_IDS
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 
     0, 1, "Whether saved set-group/user ID is available");
 #else
 SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 
     0, 0, "Whether saved set-group/user ID is available");
 #endif
 
 char kernelname[MAXPATHLEN] = "/kernel";	/* XXX bloat */
 
 SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, 
     kernelname, sizeof kernelname, "Name of kernel file booted");
 
 #ifdef SMP
 SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, 
     &mp_ncpus, 0, "Number of active CPUs");
 #else
 SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, 
     0, 1, "Number of active CPUs");
 #endif
 
 SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, 
     0, BYTE_ORDER, "System byte order");
 
 SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, 
     0, PAGE_SIZE, "System memory page size");
 
 static char	machine_arch[] = MACHINE_ARCH;
 SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD,
     machine_arch, 0, "System architecture");
 
 char hostname[MAXHOSTNAMELEN];
 
 static int
 sysctl_hostname SYSCTL_HANDLER_ARGS
 {
 	int error;
 
 	if (req->p->p_prison) 
 		error = sysctl_handle_string(oidp, 
 		    req->p->p_prison->pr_host,
 		    sizeof req->p->p_prison->pr_host, req);
 	else
 		error = sysctl_handle_string(oidp, 
 		    hostname, sizeof hostname, req);
 	return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, 
        CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON,
        0, 0, sysctl_hostname, "A", "Hostname");
 
 int securelevel = -1;
 
 static int
 sysctl_kern_securelvl SYSCTL_HANDLER_ARGS
 {
 		int error, level;
 
 		level = securelevel;
 		error = sysctl_handle_int(oidp, &level, 0, req);
 		if (error || !req->newptr)
 			return (error);
 		if (level < securelevel)
 			return (EPERM);
 		securelevel = level;
 		return (error);
 }
 
 SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, CTLTYPE_INT|CTLFLAG_RW,
     0, 0, sysctl_kern_securelvl, "I", "Current secure level");
 
 char domainname[MAXHOSTNAMELEN];
 SYSCTL_STRING(_kern, KERN_NISDOMAINNAME, domainname, CTLFLAG_RW,
     &domainname, sizeof(domainname), "Name of the current YP/NIS domain");
 
 long hostid;
 /* Some trouble here, if sizeof (int) != sizeof (long) */
 SYSCTL_INT(_kern, KERN_HOSTID, hostid, CTLFLAG_RW, &hostid, 0, "Host ID");
 
 /*
  * This is really cheating.  These actually live in the libc, something
  * which I'm not quite sure is a good idea anyway, but in order for 
  * getnext and friends to actually work, we define dummies here.
  */
 SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, 
     "", 0, "PATH that finds all the standard utilities");
 SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD, 
     0, 0, "Max ibase/obase values in bc(1)");
 SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD, 
     0, 0, "Max array size in bc(1)");
 SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD, 
     0, 0, "Max scale value in bc(1)");
 SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD, 
     0, 0, "Max string length in bc(1)");
 SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD, 
     0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry");
 SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, "");
 SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD, 
     0, 0, "Max length (bytes) of a text-processing utility's input line");
 SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD, 
     0, 0, "Maximum number of repeats of a regexp permitted");
 SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD, 
     0, 0, 
     "The version of POSIX 1003.2 with which the system attempts to comply");
 SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD, 
     0, 0, "Whether C development supports the C bindings option");
 SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD, 
     0, 0, "Whether system supports the C development utilities option");
 SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD, 
     0, 0, "");
 SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD, 
     0, 0, "Whether system supports FORTRAN development utilities");
 SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD, 
     0, 0, "Whether system supports FORTRAN runtime utilities");
 SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD, 
     0, 0, "Whether system supports creation of locales");
 SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD, 
     0, 0, "Whether system supports software development utilities");
 SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD, 
     0, 0, "Whether system supports the user portability utilities");
 SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD, 
     0, 0, "Min Maximum number of streams a process may have open at one time");
 SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD, 
     0, 0, "Min Maximum number of types supported for timezone names");
 
 #include <sys/vnode.h>
 SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, 
     0, sizeof(struct vnode), "sizeof(struct vnode)");
 
 SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, 
     0, sizeof(struct proc), "sizeof(struct proc)");
 
-#include <miscfs/specfs/specdev.h>
+#include <sys/conf.h>
 SYSCTL_INT(_debug_sizeof, OID_AUTO, specinfo, CTLFLAG_RD,
     0, sizeof(struct specinfo), "sizeof(struct specinfo)");
Index: head/sys/kern/vfs_aio.c
===================================================================
--- head/sys/kern/vfs_aio.c	(revision 49534)
+++ head/sys/kern/vfs_aio.c	(revision 49535)
@@ -1,2008 +1,2007 @@
 /*
  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. John S. Dyson's name may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
  * bad that happens because of using this software isn't the responsibility
  * of the author.  This software is distributed AS-IS.
  *
- * $Id: vfs_aio.c,v 1.53 1999/06/30 15:33:36 peter Exp $
+ * $Id: vfs_aio.c,v 1.54 1999/07/01 13:21:40 peter Exp $
  */
 
 /*
  * This file contains support for the POSIX 1003.1B AIO/LIO facility.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/lock.h>
 #include <sys/unistd.h>
 #include <sys/proc.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
-#include <miscfs/specfs/specdev.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_zone.h>
 #include <sys/aio.h>
 #include <sys/shm.h>
 
 #include <machine/cpu.h>
 #include <machine/limits.h>
 
 static	long jobrefid;
 
 #define JOBST_NULL			0x0
 #define	JOBST_JOBQPROC		0x1
 #define JOBST_JOBQGLOBAL	0x2
 #define JOBST_JOBRUNNING	0x3
 #define JOBST_JOBFINISHED	0x4
 #define	JOBST_JOBQBUF		0x5
 #define	JOBST_JOBBFINISHED	0x6
 
 #ifndef MAX_AIO_PER_PROC
 #define MAX_AIO_PER_PROC	32
 #endif
 
 #ifndef MAX_AIO_QUEUE_PER_PROC
 #define MAX_AIO_QUEUE_PER_PROC	256 /* Bigger than AIO_LISTIO_MAX */
 #endif
 
 #ifndef MAX_AIO_PROCS
 #define MAX_AIO_PROCS		32
 #endif
 
 #ifndef MAX_AIO_QUEUE
 #define	MAX_AIO_QUEUE		1024 /* Bigger than AIO_LISTIO_MAX */
 #endif
 
 #ifndef TARGET_AIO_PROCS
 #define TARGET_AIO_PROCS	0
 #endif
 
 #ifndef MAX_BUF_AIO
 #define MAX_BUF_AIO 16
 #endif
 
 #ifndef AIOD_TIMEOUT_DEFAULT
 #define	AIOD_TIMEOUT_DEFAULT (10 * hz)
 #endif
 
 #ifndef AIOD_LIFETIME_DEFAULT
 #define AIOD_LIFETIME_DEFAULT (30 * hz)
 #endif
 
 static int max_aio_procs = MAX_AIO_PROCS;
 static int num_aio_procs = 0;
 static int target_aio_procs = TARGET_AIO_PROCS;
 static int max_queue_count = MAX_AIO_QUEUE;
 static int num_queue_count = 0;
 static int num_buf_aio = 0;
 static int num_aio_resv_start = 0;
 static int aiod_timeout;
 static int aiod_lifetime;
 
 static int max_aio_per_proc = MAX_AIO_PER_PROC,
 	max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC;
 
 static int max_buf_aio = MAX_BUF_AIO;
 
 SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc,
 	CTLFLAG_RW, &max_aio_per_proc, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc,
 	CTLFLAG_RW, &max_aio_queue_per_proc, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs,
 	CTLFLAG_RW, &max_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
 	CTLFLAG_RD, &num_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count,
 	CTLFLAG_RD, &num_queue_count, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue,
 	CTLFLAG_RW, &max_queue_count, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs,
 	CTLFLAG_RW, &target_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio,
 	CTLFLAG_RW, &max_buf_aio, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio,
 	CTLFLAG_RD, &num_buf_aio, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime,
 	CTLFLAG_RW, &aiod_lifetime, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout,
 	CTLFLAG_RW, &aiod_timeout, 0, "");
 
 
 /*
  * Job queue item
  */
 
 #define AIOCBLIST_CANCELLED	0x1
 #define AIOCBLIST_RUNDOWN	0x4
 #define AIOCBLIST_ASYNCFREE	0x8
 #define AIOCBLIST_DONE		0x10
 
 struct aiocblist {
 	TAILQ_ENTRY (aiocblist) list;		/* List of jobs */
 	TAILQ_ENTRY (aiocblist) plist;		/* List of jobs for proc */
 	int	jobflags;
 	int	jobstate;
 	int inputcharge, outputcharge;
 	struct	buf *bp;				/* buffer pointer */
 	struct	proc *userproc;			/* User process */
 	struct	aioproclist	*jobaioproc;	/* AIO process descriptor */
 	struct	aio_liojob	*lio;		/* optional lio job */
 	struct	aiocb *uuaiocb;			/* pointer in userspace of aiocb */
 	struct	aiocb uaiocb;			/* Kernel I/O control block */
 };
 
 
 /*
  * AIO process info
  */
 #define AIOP_FREE	0x1			/* proc on free queue */
 #define AIOP_SCHED	0x2			/* proc explicitly scheduled */
 
 struct aioproclist {
 	int aioprocflags;			/* AIO proc flags */
 	TAILQ_ENTRY(aioproclist) list;		/* List of processes */
 	struct proc *aioproc;			/* The AIO thread */
 	TAILQ_HEAD (,aiocblist) jobtorun;	/* suggested job to run */
 };
 
 /*
  * data-structure for lio signal management
  */
 struct aio_liojob {
 	int lioj_flags;
 	int	lioj_buffer_count;
 	int	lioj_buffer_finished_count;
 	int	lioj_queue_count;
 	int	lioj_queue_finished_count;
 	struct sigevent lioj_signal;	/* signal on all I/O done */
 	TAILQ_ENTRY (aio_liojob) lioj_list;
 	struct kaioinfo *lioj_ki;
 };
 #define	LIOJ_SIGNAL			0x1 /* signal on all done (lio) */
 #define	LIOJ_SIGNAL_POSTED	0x2	/* signal has been posted */
 
 /*
  * per process aio data structure
  */
 struct kaioinfo {
 	int	kaio_flags;			/* per process kaio flags */
 	int	kaio_maxactive_count;	/* maximum number of AIOs */
 	int	kaio_active_count;	/* number of currently used AIOs */
 	int	kaio_qallowed_count;	/* maxiumu size of AIO queue */
 	int	kaio_queue_count;	/* size of AIO queue */
 	int	kaio_ballowed_count;	/* maximum number of buffers */
 	int	kaio_queue_finished_count;	/* number of daemon jobs finished */
 	int	kaio_buffer_count;	/* number of physio buffers */
 	int	kaio_buffer_finished_count;	/* count of I/O done */
 	struct proc *kaio_p;			/* process that uses this kaio block */
 	TAILQ_HEAD (,aio_liojob) kaio_liojoblist;	/* list of lio jobs */
 	TAILQ_HEAD (,aiocblist)	kaio_jobqueue;	/* job queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_jobdone;	/* done queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_bufqueue;	/* buffer job queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_bufdone;	/* buffer done queue for process */
 };
 
 #define KAIO_RUNDOWN 0x1		/* process is being run down */
 #define KAIO_WAKEUP 0x2			/* wakeup process when there is a significant
 								   event */
 
 
 static TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc;
 static TAILQ_HEAD(,aiocblist) aio_jobs;			/* Async job list */
 static TAILQ_HEAD(,aiocblist) aio_bufjobs;		/* Phys I/O job list */
 static TAILQ_HEAD(,aiocblist) aio_freejobs;		/* Pool of free jobs */
 
 static void aio_init_aioinfo(struct proc *p) ;
 static void aio_onceonly(void *) ;
 static int aio_free_entry(struct aiocblist *aiocbe);
 static void aio_process(struct aiocblist *aiocbe);
 static int aio_newproc(void) ;
 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ;
 static void aio_physwakeup(struct buf *bp);
 static int aio_fphysio(struct proc *p, struct aiocblist *aiocbe, int type);
 static int aio_qphysio(struct proc *p, struct aiocblist *iocb);
 static void aio_daemon(void *uproc);
 
 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL);
 
 static vm_zone_t kaio_zone=0, aiop_zone=0,
 	aiocb_zone=0, aiol_zone=0, aiolio_zone=0;
 
 /*
  * Startup initialization
  */
 void
 aio_onceonly(void *na)
 {
 	TAILQ_INIT(&aio_freeproc);
 	TAILQ_INIT(&aio_activeproc);
 	TAILQ_INIT(&aio_jobs);
 	TAILQ_INIT(&aio_bufjobs);
 	TAILQ_INIT(&aio_freejobs);
 	kaio_zone = zinit("AIO", sizeof (struct kaioinfo), 0, 0, 1);
 	aiop_zone = zinit("AIOP", sizeof (struct aioproclist), 0, 0, 1);
 	aiocb_zone = zinit("AIOCB", sizeof (struct aiocblist), 0, 0, 1);
 	aiol_zone = zinit("AIOL", AIO_LISTIO_MAX * sizeof (int), 0, 0, 1);
 	aiolio_zone = zinit("AIOLIO",
 		AIO_LISTIO_MAX * sizeof (struct aio_liojob), 0, 0, 1);
 	aiod_timeout = AIOD_TIMEOUT_DEFAULT;
 	aiod_lifetime = AIOD_LIFETIME_DEFAULT;
 	jobrefid = 1;
 }
 
 /*
  * Init the per-process aioinfo structure.
  * The aioinfo limits are set per-process for user limit (resource) management.
  */
 void
 aio_init_aioinfo(struct proc *p)
 {
 	struct kaioinfo *ki;
 	if (p->p_aioinfo == NULL) {
 		ki = zalloc(kaio_zone);
 		p->p_aioinfo = ki;
 		ki->kaio_flags = 0;
 		ki->kaio_maxactive_count = max_aio_per_proc;
 		ki->kaio_active_count = 0;
 		ki->kaio_qallowed_count = max_aio_queue_per_proc;
 		ki->kaio_queue_count = 0;
 		ki->kaio_ballowed_count = max_buf_aio;
 		ki->kaio_buffer_count = 0;
 		ki->kaio_buffer_finished_count = 0;
 		ki->kaio_p = p;
 		TAILQ_INIT(&ki->kaio_jobdone);
 		TAILQ_INIT(&ki->kaio_jobqueue);
 		TAILQ_INIT(&ki->kaio_bufdone);
 		TAILQ_INIT(&ki->kaio_bufqueue);
 		TAILQ_INIT(&ki->kaio_liojoblist);
 	}
 }
 
 /*
  * Free a job entry.  Wait for completion if it is currently
  * active, but don't delay forever.  If we delay, we return
  * a flag that says that we have to restart the queue scan.
  */
 int
 aio_free_entry(struct aiocblist *aiocbe)
 {
 	struct kaioinfo *ki;
 	struct aioproclist *aiop;
 	struct aio_liojob *lj;
 	struct proc *p;
 	int error;
 	int s;
 
 	if (aiocbe->jobstate == JOBST_NULL)
 		panic("aio_free_entry: freeing already free job");
 
 	p = aiocbe->userproc;
 	ki = p->p_aioinfo;
 	lj = aiocbe->lio;
 	if (ki == NULL)
 		panic("aio_free_entry: missing p->p_aioinfo");
 
 	if (aiocbe->jobstate == JOBST_JOBRUNNING) {
 		if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE)
 			return 0;
 		aiocbe->jobflags |= AIOCBLIST_RUNDOWN;
 		tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0);
 	}
 	aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
 
 	if (aiocbe->bp == NULL) {
 		if (ki->kaio_queue_count <= 0)
 			panic("aio_free_entry: process queue size <= 0");
 		if (num_queue_count <= 0)
 			panic("aio_free_entry: system wide queue size <= 0");
 	
 		if(lj) {
 			lj->lioj_queue_count--;
 			if (aiocbe->jobflags & AIOCBLIST_DONE)
 				lj->lioj_queue_finished_count--;
 		}
 		ki->kaio_queue_count--;
 		if (aiocbe->jobflags & AIOCBLIST_DONE)
 			ki->kaio_queue_finished_count--;
 		num_queue_count--;
 
 	} else {
 		if(lj) {
 			lj->lioj_buffer_count--;
 			if (aiocbe->jobflags & AIOCBLIST_DONE)
 				lj->lioj_buffer_finished_count--;
 		}
 		if (aiocbe->jobflags & AIOCBLIST_DONE)
 			ki->kaio_buffer_finished_count--;
 		ki->kaio_buffer_count--;
 		num_buf_aio--;
 
 	}
 
 	if ((ki->kaio_flags & KAIO_WAKEUP) ||
 	    ((ki->kaio_flags & KAIO_RUNDOWN) &&
 	    ((ki->kaio_buffer_count == 0) && 
 	    (ki->kaio_queue_count == 0)))) {
 		ki->kaio_flags &= ~KAIO_WAKEUP;
 		wakeup(p);
 	}
 
 	if ( aiocbe->jobstate == JOBST_JOBQBUF) {
 		if ((error = aio_fphysio(p, aiocbe, 1)) != 0)
 			return error;
 		if (aiocbe->jobstate != JOBST_JOBBFINISHED)
 			panic("aio_free_entry: invalid physio finish-up state");
 		s = splbio();
 		TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist);
 		splx(s);
 	} else if ( aiocbe->jobstate == JOBST_JOBQPROC) {
 		aiop = aiocbe->jobaioproc;
 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
 	} else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) {
 		TAILQ_REMOVE(&aio_jobs, aiocbe, list);
 	} else if ( aiocbe->jobstate == JOBST_JOBFINISHED) {
 		TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist);
 	} else if ( aiocbe->jobstate == JOBST_JOBBFINISHED) {
 		s = splbio();
 		TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist);
 		splx(s);
 		if (aiocbe->bp) {
 			vunmapbuf(aiocbe->bp);
 			relpbuf(aiocbe->bp, NULL);
 			aiocbe->bp = NULL;
 		}
 	}
 	if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) {
 		TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 		zfree(aiolio_zone, lj);
 	}
 	TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 	aiocbe->jobstate = JOBST_NULL;
 	return 0;
 }
 
 /*
  * Rundown the jobs for a given process.  
  */
 void
 aio_proc_rundown(struct proc *p)
 {
 	int s;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj, *ljn;
 	struct aiocblist *aiocbe, *aiocbn;
 	
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return;
 
 	ki->kaio_flags |= LIOJ_SIGNAL_POSTED;
 	while ((ki->kaio_active_count > 0) ||
 		(ki->kaio_buffer_count > ki->kaio_buffer_finished_count)) {
 		ki->kaio_flags |= KAIO_RUNDOWN;
 		if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout))
 			break;
 	}
 
 restart1:
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe))
 			goto restart1;
 	}
 
 restart2:
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe))
 			goto restart2;
 	}
 
 /*
  * Note the use of lots of splbio here, trying to avoid
  * splbio for long chains of I/O.  Probably unnecessary.
  */
 
 restart3:
 	s = splbio();
 	while (TAILQ_FIRST(&ki->kaio_bufqueue)) {
 		ki->kaio_flags |= KAIO_WAKEUP;
 		tsleep (p, PRIBIO, "aioprn", 0);	
 		splx(s);
 		goto restart3;
 	}
 	splx(s);
 
 restart4:
 	s = splbio();
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_bufdone);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe)) {
 			splx(s);
 			goto restart4;
 		}
 	}
 	splx(s);
 
 	for ( lj = TAILQ_FIRST(&ki->kaio_liojoblist);
 		  lj;
 		  lj = ljn) {
 			ljn = TAILQ_NEXT(lj, lioj_list);
 			if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) {
 				TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 				zfree(aiolio_zone, lj);
 			} else {
 #if defined(DIAGNOSTIC)
 				printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, QF:%d\n",
 					lj->lioj_buffer_count, lj->lioj_buffer_finished_count,
 					lj->lioj_queue_count, lj->lioj_queue_finished_count);
 #endif
 			}
 	}
 
 	zfree(kaio_zone, ki);
 	p->p_aioinfo = NULL;
 }
 
 /*
  * Select a job to run (called by an AIO daemon)
  */
 static struct aiocblist *
 aio_selectjob(struct aioproclist *aiop)
 {
 
 	struct aiocblist *aiocbe;
 
 	aiocbe = TAILQ_FIRST(&aiop->jobtorun);
 	if (aiocbe) {
 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
 		return aiocbe;
 	}
 
 	for (aiocbe = TAILQ_FIRST(&aio_jobs);
 		aiocbe;
 		aiocbe = TAILQ_NEXT(aiocbe, list)) {
 		struct kaioinfo *ki;
 		struct proc *userp;
 
 		userp = aiocbe->userproc;
 		ki = userp->p_aioinfo;
 
 		if (ki->kaio_active_count < ki->kaio_maxactive_count) {
 			TAILQ_REMOVE(&aio_jobs, aiocbe, list);
 			return aiocbe;
 		}
 	}
 
 	return NULL;
 }
 
 /*
  * The AIO processing activity.  This is the code that does the
  * I/O request for the non-physio version of the operations.  The
  * normal vn operations are used, and this code should work in
  * all instances for every type of file, including pipes, sockets,
  * fifos, and regular files.
  */
 void
 aio_process(struct aiocblist *aiocbe)
 {
 	struct filedesc *fdp;
 	struct proc *userp, *mycp;
 	struct aiocb *cb;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	int error;
 	off_t offset;
 	int oublock_st, oublock_end;
 	int inblock_st, inblock_end;
 
 	userp = aiocbe->userproc;
 	cb = &aiocbe->uaiocb;
 
 	mycp = curproc;
 
 	fdp = mycp->p_fd;
 	fd = cb->aio_fildes;
 	fp = fdp->fd_ofiles[fd];
 
 	aiov.iov_base = (void *) cb->aio_buf;
 	aiov.iov_len = cb->aio_nbytes;
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = offset = cb->aio_offset;
 	auio.uio_resid = cb->aio_nbytes;
 	cnt = cb->aio_nbytes;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = mycp;
 
 	inblock_st = mycp->p_stats->p_ru.ru_inblock;
 	oublock_st = mycp->p_stats->p_ru.ru_oublock;
 	if (cb->aio_lio_opcode == LIO_READ) {
 		auio.uio_rw = UIO_READ;
 		error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred, FOF_OFFSET);
 	} else {
 		auio.uio_rw = UIO_WRITE;
 		error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred, FOF_OFFSET);
 	}
 	inblock_end = mycp->p_stats->p_ru.ru_inblock;
 	oublock_end = mycp->p_stats->p_ru.ru_oublock;
 
 	aiocbe->inputcharge = inblock_end - inblock_st;
 	aiocbe->outputcharge = oublock_end - oublock_st;
 
 	if (error) {
 		if (auio.uio_resid != cnt) {
 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
 				error = 0;
 			if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE))
 				psignal(userp, SIGPIPE);
 		}
 	}
 
 	cnt -= auio.uio_resid;
 	cb->_aiocb_private.error = error;
 	cb->_aiocb_private.status = cnt;
 	
 	return;
 
 }
 
 /*
  * The AIO daemon, most of the actual work is done in aio_process,
  * but the setup (and address space mgmt) is done in this routine.
  */
 static void
 aio_daemon(void *uproc)
 {
 	int s;
 	struct aioproclist *aiop;
 	struct vmspace *myvm;
 	struct proc *mycp;
 
 	/*
 	 * Local copies of curproc (cp) and vmspace (myvm)
 	 */
 	mycp = curproc;
 	myvm = mycp->p_vmspace;
 
 	if (mycp->p_textvp) {
 		vrele(mycp->p_textvp);
 		mycp->p_textvp = NULL;
 	}
 
 	/*
 	 * Allocate and ready the aio control info.  There is one
 	 * aiop structure per daemon.
 	 */
 	aiop = zalloc(aiop_zone);
 	aiop->aioproc = mycp;
 	aiop->aioprocflags |= AIOP_FREE;
 	TAILQ_INIT(&aiop->jobtorun);
 
 	/*
 	 * Place thread (lightweight process) onto the AIO free thread list
 	 */
 	if (TAILQ_EMPTY(&aio_freeproc))
 		wakeup(&aio_freeproc);
 	TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 
 	/*
 	 * Make up a name for the daemon
 	 */
 	strcpy(mycp->p_comm, "aiod");
 
 	/*
 	 * Get rid of our current filedescriptors.  AIOD's don't need any
 	 * filedescriptors, except as temporarily inherited from the client.
 	 * Credentials are also cloned, and made equivalent to "root."
 	 */
 	fdfree(mycp);
 	mycp->p_fd = NULL;
 	mycp->p_ucred = crcopy(mycp->p_ucred);
 	mycp->p_ucred->cr_uid = 0;
 	mycp->p_ucred->cr_ngroups = 1;
 	mycp->p_ucred->cr_groups[0] = 1;
 
 	/*
 	 * The daemon resides in its own pgrp.
 	 */
 	enterpgrp(mycp, mycp->p_pid, 1);
 
 	/*
 	 * Mark special process type
 	 */
 	mycp->p_flag |= P_SYSTEM|P_KTHREADP;
 
 	/*
 	 * Wakeup parent process.  (Parent sleeps to keep from blasting away
 	 * creating to many daemons.)
 	 */
 	wakeup(mycp);
 
 	while(1) {
 		struct proc *curcp;
 		struct	aiocblist *aiocbe;
 
 		/*
 		 * curcp is the current daemon process context.
 		 * userp is the current user process context.
 		 */
 		curcp = mycp;
 
 		/*
 		 * Take daemon off of free queue
 		 */
 		if (aiop->aioprocflags & AIOP_FREE) {
 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
 			TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
 			aiop->aioprocflags &= ~AIOP_FREE;
 		}
 		aiop->aioprocflags &= ~AIOP_SCHED;
 
 		/*
 		 * Check for jobs
 		 */
 		while ((aiocbe = aio_selectjob(aiop)) != NULL) {
 			struct proc *userp;
 			struct aiocb *cb;
 			struct kaioinfo *ki;
 			struct aio_liojob *lj;
 
 			cb = &aiocbe->uaiocb;
 			userp = aiocbe->userproc;
 
 			aiocbe->jobstate = JOBST_JOBRUNNING;
 
 			/*
 			 * Connect to process address space for user program
 			 */
 			if (userp != curcp) {
 				struct vmspace *tmpvm;
 				/*
 				 * Save the current address space that we are connected to.
 				 */
 				tmpvm = mycp->p_vmspace;
 				/*
 				 * Point to the new user address space, and refer to it.
 				 */
 				mycp->p_vmspace = userp->p_vmspace;
 				mycp->p_vmspace->vm_refcnt++;
 				/*
 				 * Activate the new mapping.
 				 */
 				pmap_activate(mycp);
 				/*
 				 * If the old address space wasn't the daemons own address
 				 * space, then we need to remove the daemon's reference from
 				 * the other process that it was acting on behalf of.
 				 */
 				if (tmpvm != myvm) {
 					vmspace_free(tmpvm);
 				}
 				/*
 				 * Disassociate from previous clients file descriptors, and
 				 * associate to the new clients descriptors.  Note that
 				 * the daemon doesn't need to worry about its orginal
 				 * descriptors, because they were originally freed.
 				 */
 				if (mycp->p_fd)
 					fdfree(mycp);
 				mycp->p_fd = fdshare(userp);
 				curcp = userp;
 			}
 
 			ki = userp->p_aioinfo;
 			lj = aiocbe->lio;
 
 			/*
 			 * Account for currently active jobs
 			 */
 			ki->kaio_active_count++;
 
 			/*
 			 * Do the I/O function
 			 */
 			aiocbe->jobaioproc = aiop;
 			aio_process(aiocbe);
 
 			/*
 			 * decrement the active job count
 			 */
 			ki->kaio_active_count--;
 
 			/*
 			 * increment the completion count for wakeup/signal comparisons
 			 */
 			aiocbe->jobflags |= AIOCBLIST_DONE;
 			ki->kaio_queue_finished_count++;
 			if (lj) {
 				lj->lioj_queue_finished_count++;
 			}
 			if ((ki->kaio_flags & KAIO_WAKEUP) ||
 			    ((ki->kaio_flags & KAIO_RUNDOWN) &&
 			    (ki->kaio_active_count == 0))) {
 				ki->kaio_flags &= ~KAIO_WAKEUP;
 				wakeup(userp);
 			}
 
 			s = splbio();
 			if (lj && (lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) ==
 				LIOJ_SIGNAL) {
 				if ((lj->lioj_queue_finished_count == lj->lioj_queue_count) &&
 					(lj->lioj_buffer_finished_count == lj->lioj_buffer_count)) {
 						psignal(userp, lj->lioj_signal.sigev_signo);
 						lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 				}
 			}
 			splx(s);
 
 			aiocbe->jobstate = JOBST_JOBFINISHED;
 
 			/*
 			 * If the I/O request should be automatically rundown, do the
 			 * needed cleanup.  Otherwise, place the queue entry for
 			 * the just finished I/O request into the done queue for the
 			 * associated client.
 			 */
 			if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) {
 				aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
 				TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 			} else {
 				TAILQ_REMOVE(&ki->kaio_jobqueue,
 					aiocbe, plist);
 				TAILQ_INSERT_TAIL(&ki->kaio_jobdone,
 					aiocbe, plist);
 			}
 
 			if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) {
 				wakeup(aiocbe);
 				aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
 			}
 
 			if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
 				psignal(userp, cb->aio_sigevent.sigev_signo);
 			}
 		}
 
 		/*
 		 * Disconnect from user address space
 		 */
 		if (curcp != mycp) {
 			struct vmspace *tmpvm;
 			/*
 			 * Get the user address space to disconnect from.
 			 */
 			tmpvm = mycp->p_vmspace;
 			/*
 			 * Get original address space for daemon.
 			 */
 			mycp->p_vmspace = myvm;
 			/*
 			 * Activate the daemon's address space.
 			 */
 			pmap_activate(mycp);
 #if defined(DIAGNOSTIC)
 			if (tmpvm == myvm)
 				printf("AIOD: vmspace problem -- %d\n", mycp->p_pid);
 #endif
 			/*
 			 * remove our vmspace reference.
 			 */
 			vmspace_free(tmpvm);
 			/*
 			 * disassociate from the user process's file descriptors.
 			 */
 			if (mycp->p_fd)
 				fdfree(mycp);
 			mycp->p_fd = NULL;
 			curcp = mycp;
 		}
 
 		/*
 		 * If we are the first to be put onto the free queue, wakeup
 		 * anyone waiting for a daemon.
 		 */
 		TAILQ_REMOVE(&aio_activeproc, aiop, list);
 		if (TAILQ_EMPTY(&aio_freeproc))
 			wakeup(&aio_freeproc);
 		TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 		aiop->aioprocflags |= AIOP_FREE;
 
 		/*
 		 * If daemon is inactive for a long time, allow it to exit, thereby
 		 * freeing resources.
 		 */
 		if (((aiop->aioprocflags & AIOP_SCHED) == 0) &&
 			tsleep(mycp, PRIBIO, "aiordy", aiod_lifetime)) {
 			if ((TAILQ_FIRST(&aio_jobs) == NULL) &&
 				(TAILQ_FIRST(&aiop->jobtorun) == NULL)) {
 				if ((aiop->aioprocflags & AIOP_FREE) &&
 					(num_aio_procs > target_aio_procs)) {
 					TAILQ_REMOVE(&aio_freeproc, aiop, list);
 					zfree(aiop_zone, aiop);
 					num_aio_procs--;
 #if defined(DIAGNOSTIC)
 					if (mycp->p_vmspace->vm_refcnt <= 1)
 						printf("AIOD: bad vm refcnt for exiting daemon: %d\n",
 							mycp->p_vmspace->vm_refcnt);
 #endif
 					exit1(mycp, 0);
 				}
 			}
 		}
 	}
 }
 
 /*
  * Create a new AIO daemon.  This is mostly a kernel-thread fork routine.
  * The AIO daemon modifies its environment itself.
  */
 static int
 aio_newproc()
 {
 	int error;
 	struct proc *p, *np;
 
 	p = &proc0;
 	error = fork1(p, RFPROC|RFMEM|RFNOWAIT, &np);
 	if (error)
 		return error;
 	cpu_set_fork_handler(np, aio_daemon, curproc);
 
 	/*
 	 * Wait until daemon is started, but continue on just in case (to
 	 * handle error conditions.
 	 */
 	error = tsleep(np, PZERO, "aiosta", aiod_timeout);
 	num_aio_procs++;
 
 	return error;
 
 }
 
 /*
  * Try the high-performance physio method for eligible VCHR devices.  This
  * routine doesn't require the use of any additional threads, and have
  * overhead.
  */
 int
 aio_qphysio(p, aiocbe)
 	struct proc *p;
 	struct aiocblist *aiocbe;
 {
 	int error;
 	struct aiocb *cb;
 	struct file *fp;
 	struct buf *bp;
 	int bflags;
 	struct vnode *vp;
 	struct kaioinfo *ki;
 	struct filedesc *fdp;
 	struct aio_liojob *lj;
 	int fd;
 	int s;
 	int cnt;
 	dev_t dev;
 	int rw;
 	d_strategy_t *fstrategy;
 	struct cdevsw *cdev;
 	struct cdevsw *bdev;
 
 	cb = &aiocbe->uaiocb;
 	fdp = p->p_fd;
 	fd = cb->aio_fildes;
 	fp = fdp->fd_ofiles[fd];
 
 	if (fp->f_type != DTYPE_VNODE) {
 		return -1;
 	}
 
 	vp = (struct vnode *)fp->f_data;
 	if (vp->v_type != VCHR || ((cb->aio_nbytes & (DEV_BSIZE - 1)) != 0)) {
 		return -1;
 	}
 
 	if ((cb->aio_nbytes > MAXPHYS) && (num_buf_aio >= max_buf_aio)) {
 		return -1;
 	}
 
 	if ((vp->v_specinfo == NULL) || (vp->v_flag & VISTTY)) {
 		return -1;
 	}
 
 	if (vp->v_rdev == NODEV) {
 		return -1;
 	}
 
 	cdev = devsw(vp->v_rdev);
 	if (cdev == NULL) {
 		return -1;
 	}
 
 	if (cdev->d_bmaj == -1) {
 		return -1;
 	}
 	bdev = cdev;
 
 	ki = p->p_aioinfo;
 	if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) {
 		return -1;
 	}
 
 	cnt = cb->aio_nbytes;
 	if (cnt > MAXPHYS) {
 		return -1;
 	}
 
 	dev = makebdev(bdev->d_bmaj, minor(vp->v_rdev));
 
 	/*
 	 * Physical I/O is charged directly to the process, so we don't have
 	 * to fake it.
 	 */
 	aiocbe->inputcharge = 0;
 	aiocbe->outputcharge = 0;
 
 	ki->kaio_buffer_count++;
 
 	lj = aiocbe->lio;
 	if (lj) {
 		lj->lioj_buffer_count++;
 	}
 
 	/* create and build a buffer header for a transfer */
 	bp = (struct buf *)getpbuf(NULL);
 
 	/*
 	 * get a copy of the kva from the physical buffer
 	 */
 	bp->b_caller1 = p;
 	bp->b_dev = dev;
 	error = bp->b_error = 0;
 
 	if (cb->aio_lio_opcode == LIO_WRITE) {
 		rw = 0;
 		bflags = B_WRITE;
 	} else {
 		rw = 1;
 		bflags = B_READ;
 	}
 	
 	bp->b_bcount = cb->aio_nbytes;
 	bp->b_bufsize = cb->aio_nbytes;
 	bp->b_flags = B_PHYS | B_CALL | bflags;
 	bp->b_iodone = aio_physwakeup;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = (void *) cb->aio_buf;
 	bp->b_blkno = btodb(cb->aio_offset);
 
 	if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) {
 		error = EFAULT;
 		goto doerror;
 	}
 	if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) {
 		error = EFAULT;
 		goto doerror;
 	}
 
 	/* bring buffer into kernel space */
 	vmapbuf(bp);
 
 	s = splbio();
 	aiocbe->bp = bp;
 	bp->b_spc = (void *)aiocbe;
 	TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list);
 	TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist);
 	aiocbe->jobstate = JOBST_JOBQBUF;
 	cb->_aiocb_private.status = cb->aio_nbytes;
 	num_buf_aio++;
 	fstrategy = bdev->d_strategy;
 	bp->b_error = 0;
 
 	splx(s);
 	/* perform transfer */
 	(*fstrategy)(bp);
 
 	s = splbio();
 	/*
 	 * If we had an error invoking the request, or an error in processing
 	 * the request before we have returned, we process it as an error
 	 * in transfer.  Note that such an I/O error is not indicated immediately,
 	 * but is returned using the aio_error mechanism.  In this case, aio_suspend
 	 * will return immediately.
 	 */
 	if (bp->b_error || (bp->b_flags & B_ERROR)) {
 		struct aiocb *job = aiocbe->uuaiocb;
 
 		aiocbe->uaiocb._aiocb_private.status = 0;
 		suword(&job->_aiocb_private.status, 0);
 		aiocbe->uaiocb._aiocb_private.error = bp->b_error;
 		suword(&job->_aiocb_private.error, bp->b_error);
 
 		ki->kaio_buffer_finished_count++;
 
 		if (aiocbe->jobstate != JOBST_JOBBFINISHED) {
 			aiocbe->jobstate = JOBST_JOBBFINISHED;
 			aiocbe->jobflags |= AIOCBLIST_DONE;
 			TAILQ_REMOVE(&aio_bufjobs, aiocbe, list);
 			TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist);
 			TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist);
 		}
 	}
 	splx(s);
 	return 0;
 
 doerror:
 	ki->kaio_buffer_count--;
 	if (lj) {
 		lj->lioj_buffer_count--;
 	}
 	aiocbe->bp = NULL;
 	relpbuf(bp, NULL);
 	return error;
 }
 
 /*
  * This waits/tests physio completion.
  */
 int
 aio_fphysio(p, iocb, flgwait)
 	struct proc *p;
 	struct aiocblist *iocb;
 	int flgwait;
 {
 	int s;
 	struct buf *bp;
 	int error;
 
 	bp = iocb->bp;
 
 	s = splbio();
 	if (flgwait == 0) {
 		if ((bp->b_flags & B_DONE) == 0) {
 			splx(s);
 			return EINPROGRESS;
 		}
 	}
 
 	while ((bp->b_flags & B_DONE) == 0) {
 		if (tsleep((caddr_t)bp, PRIBIO, "physstr", aiod_timeout)) {
 			if ((bp->b_flags & B_DONE) == 0) {
 				splx(s);
 				return EINPROGRESS;
 			} else {
 				break;
 			}
 		}
 	}
 
 	/* release mapping into kernel space */
 	vunmapbuf(bp);
 	iocb->bp = 0;
 
 	error = 0;
 	/*
 	 * check for an error
 	 */
 	if (bp->b_flags & B_ERROR) {
 		error = bp->b_error;
 	}
 
 	relpbuf(bp, NULL);
 	return (error);
 }
 
 /*
  * Queue a new AIO request.  Choosing either the threaded or direct physio
  * VCHR technique is done in this code.
  */
 static int
 _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	unsigned int fd;
 
 	int error;
 	int opcode;
 	struct aiocblist *aiocbe;
 	struct aioproclist *aiop;
 	struct kaioinfo *ki;
 
 	if ((aiocbe = TAILQ_FIRST(&aio_freejobs)) != NULL) {
 		TAILQ_REMOVE(&aio_freejobs, aiocbe, list);
 	} else {
 		aiocbe = zalloc (aiocb_zone);
 	}
 
 	aiocbe->inputcharge = 0;
 	aiocbe->outputcharge = 0;
 
 	suword(&job->_aiocb_private.status, -1);
 	suword(&job->_aiocb_private.error, 0);
 	suword(&job->_aiocb_private.kernelinfo, -1);
 
 	error = copyin((caddr_t)job,
 		(caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb);
 	if (error) {
 		suword(&job->_aiocb_private.error, error);
 
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		return error;
 	}
 
 	/*
 	 * Save userspace address of the job info
 	 */
 	aiocbe->uuaiocb = job;
 
 	/*
 	 * Get the opcode
 	 */
 	if (type != LIO_NOP) {
 		aiocbe->uaiocb.aio_lio_opcode = type;
 	}
 	opcode = aiocbe->uaiocb.aio_lio_opcode;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = aiocbe->uaiocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EBADF);
 		}
 		return EBADF;
 	}
 
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) ||
 		((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EBADF);
 		}
 		return EBADF;
 	}
 
 	if (aiocbe->uaiocb.aio_offset == -1LL) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return EINVAL;
 	}
 
 	error = suword(&job->_aiocb_private.kernelinfo, jobrefid);
 	if (error) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return error;
 	}
 
 	aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jobrefid;
 	if (jobrefid == LONG_MAX)
 		jobrefid = 1;
 	else
 		jobrefid++;
 	
 	if (opcode == LIO_NOP) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, 0);
 			suword(&job->_aiocb_private.status, 0);
 			suword(&job->_aiocb_private.kernelinfo, 0);
 		}
 		return 0;
 	}
 
 	if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.status, 0);
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return EINVAL;
 	}
 
 	suword(&job->_aiocb_private.error, EINPROGRESS);
 	aiocbe->uaiocb._aiocb_private.error = EINPROGRESS;
 	aiocbe->userproc = p;
 	aiocbe->jobflags = 0;
 	aiocbe->lio = lj;
 	ki = p->p_aioinfo;
 
 	if ((error = aio_qphysio(p, aiocbe)) == 0) {
 		return 0;
 	} else if (error > 0) {
 		suword(&job->_aiocb_private.status, 0);
 		aiocbe->uaiocb._aiocb_private.error = error;
 		suword(&job->_aiocb_private.error, error);
 		return error;
 	}
 
 	/*
 	 * No buffer for daemon I/O
 	 */
 	aiocbe->bp = NULL;
 
 	ki->kaio_queue_count++;
 	if (lj) {
 		lj->lioj_queue_count++;
 	}
 	TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
 	TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list);
 	aiocbe->jobstate = JOBST_JOBQGLOBAL;
 
 	num_queue_count++;
 	error = 0;
 
 	/*
 	 * If we don't have a free AIO process, and we are below our
 	 * quota, then start one.  Otherwise, depend on the subsequent
 	 * I/O completions to pick-up this job.  If we don't sucessfully
 	 * create the new process (thread) due to resource issues, we
 	 * return an error for now (EAGAIN), which is likely not the
 	 * correct thing to do.
 	 */
 retryproc:
 	if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
 		TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
 		aiop->aioprocflags &= ~AIOP_FREE;
 		wakeup(aiop->aioproc);
 	} else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) &&
 			((ki->kaio_active_count + num_aio_resv_start) <
 				ki->kaio_maxactive_count)) {
 		num_aio_resv_start++;
 		if ((error = aio_newproc()) == 0) {
 			num_aio_resv_start--;
 			p->p_retval[0] = 0;
 			goto retryproc;
 		}
 		num_aio_resv_start--;
 	}
 	return error;
 }
 
 /*
  * This routine queues an AIO request, checking for quotas.
  */
 static int
 aio_aqueue(struct proc *p, struct aiocb *job, int type)
 {
 	struct kaioinfo *ki;
 
 	if (p->p_aioinfo == NULL) {
 		aio_init_aioinfo(p);
 	}
 
 	if (num_queue_count >= max_queue_count)
 		return EAGAIN;
 
 	ki = p->p_aioinfo;
 	if (ki->kaio_queue_count >= ki->kaio_qallowed_count)
 		return EAGAIN;
 
 	return _aio_aqueue(p, job, NULL, type);
 }
 
 /*
  * Support the aio_return system call, as a side-effect, kernel
  * resources are released.
  */
 int
 aio_return(struct proc *p, struct aio_return_args *uap)
 {
 	int s;
 	int jobref;
 	struct aiocblist *cb, *ncb;
 	struct aiocb *ujob;
 	struct kaioinfo *ki;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL) {
 		return EINVAL;
 	}
 
 	ujob = uap->aiocbp;
 
 	jobref = fuword(&ujob->_aiocb_private.kernelinfo);
 	if (jobref == -1 || jobref == 0)
 		return EINVAL;
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			if (ujob == cb->uuaiocb) {
 				p->p_retval[0] = cb->uaiocb._aiocb_private.status;
 			} else {
 				p->p_retval[0] = EFAULT;
 			}
 			if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
 				curproc->p_stats->p_ru.ru_oublock += cb->outputcharge;
 				cb->outputcharge = 0;
 			} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
 				curproc->p_stats->p_ru.ru_inblock += cb->inputcharge;
 				cb->inputcharge = 0;
 			}
 			aio_free_entry(cb);
 			return 0;
 		}
 	}
 
 	s = splbio();
 	for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 		cb;
 		cb = ncb) {
 		ncb = TAILQ_NEXT(cb, plist);
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			splx(s);
 			if (ujob == cb->uuaiocb) {
 				p->p_retval[0] = cb->uaiocb._aiocb_private.status;
 			} else {
 				p->p_retval[0] = EFAULT;
 			}
 			aio_free_entry(cb);
 			return 0;
 		}
 	}
 	splx(s);
 
 	return (EINVAL);
 }
 
 /*
  * Allow a process to wakeup when any of the I/O requests are
  * completed.
  */
 int
 aio_suspend(struct proc *p, struct aio_suspend_args *uap)
 {
 	struct timeval atv;
 	struct timespec ts;
 	struct aiocb *const *cbptr, *cbp;
 	struct kaioinfo *ki;
 	struct aiocblist *cb;
 	int i;
 	int njoblist;
 	int error, s, timo;
 	int *ijoblist;
 	struct aiocb **ujoblist;
 	
 	if (uap->nent >= AIO_LISTIO_MAX)
 		return EINVAL;
 
 	timo = 0;
 	if (uap->timeout) {
 		/*
 		 * Get timespec struct
 		 */
 		if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) {
 			return error;
 		}
 
 		if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
 			return (EINVAL);
 
 		TIMESPEC_TO_TIMEVAL(&atv, &ts);
 		if (itimerfix(&atv))
 			return (EINVAL);
 		timo = tvtohz(&atv);
 	}
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return EAGAIN;
 
 	njoblist = 0;
 	ijoblist = zalloc(aiol_zone);
 	ujoblist = zalloc(aiol_zone);
 	cbptr = uap->aiocbp;
 
 	for(i = 0; i < uap->nent; i++) {
 		cbp = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]);
 		if (cbp == 0)
 			continue;
 		ujoblist[njoblist] = cbp;
 		ijoblist[njoblist] = fuword(&cbp->_aiocb_private.kernelinfo);
 		njoblist++;
 	}
 	if (njoblist == 0) {
 		zfree(aiol_zone, ijoblist);
 		zfree(aiol_zone, ujoblist);
 		return 0;
 	}
 
 	error = 0;
 	while (1) {
 		for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 			cb; cb = TAILQ_NEXT(cb, plist)) {
 			for(i = 0; i < njoblist; i++) {
 				if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) ==
 					ijoblist[i]) {
 					if (ujoblist[i] != cb->uuaiocb)
 						error = EINVAL;
 					zfree(aiol_zone, ijoblist);
 					zfree(aiol_zone, ujoblist);
 					return error;
 				}
 			}
 		}
 
 		s = splbio();
 		for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 			cb; cb = TAILQ_NEXT(cb, plist)) {
 			for(i = 0; i < njoblist; i++) {
 				if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) ==
 					ijoblist[i]) {
 					splx(s);
 					if (ujoblist[i] != cb->uuaiocb)
 						error = EINVAL;
 					zfree(aiol_zone, ijoblist);
 					zfree(aiol_zone, ujoblist);
 					return error;
 				}
 			}
 		}
 
 		ki->kaio_flags |= KAIO_WAKEUP;
 		error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo);
 		splx(s);
 
 		if (error == EINTR) {
 			zfree(aiol_zone, ijoblist);
 			zfree(aiol_zone, ujoblist);
 			return EINTR;
 		} else if (error == EWOULDBLOCK) {
 			zfree(aiol_zone, ijoblist);
 			zfree(aiol_zone, ujoblist);
 			return EAGAIN;
 		}
 	}
 
 /* NOTREACHED */
 	return EINVAL;
 }
 
 /*
  * aio_cancel at the kernel level is a NOOP right now.  It
  * might be possible to support it partially in user mode, or
  * in kernel mode later on.
  */
 int
 aio_cancel(struct proc *p, struct aio_cancel_args *uap)
 {
       return ENOSYS;
 }
 
 /*
  * aio_error is implemented in the kernel level for compatibility
  * purposes only.  For a user mode async implementation, it would be
  * best to do it in a userland subroutine.
  */
 int
 aio_error(struct proc *p, struct aio_error_args *uap)
 {
 	int s;
 	struct aiocblist *cb;
 	struct kaioinfo *ki;
 	int jobref;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return EINVAL;
 
 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
 	if ((jobref == -1) || (jobref == 0))
 		return EINVAL;
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = cb->uaiocb._aiocb_private.error;
 			return 0;
 		}
 	}
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobqueue);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = EINPROGRESS;
 			return 0;
 		}
 	}
 
 	s = splbio();
 	for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = cb->uaiocb._aiocb_private.error;
 			splx(s);
 			return 0;
 		}
 	}
 
 	for (cb = TAILQ_FIRST(&ki->kaio_bufqueue);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = EINPROGRESS;
 			splx(s);
 			return 0;
 		}
 	}
 	splx(s);
 
 
 	/*
 	 * Hack for lio
 	 */
 /*
 	status = fuword(&uap->aiocbp->_aiocb_private.status);
 	if (status == -1) {
 		return fuword(&uap->aiocbp->_aiocb_private.error);
 	}
 */
 	return EINVAL;
 }
 
 int
 aio_read(struct proc *p, struct aio_read_args *uap)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	struct aiocb iocb;
 	int error, pmodes;
 
 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
 	}
 
 	/*
 	 * Get control block
 	 */
 	if ((error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) != 0)
 		return error;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = iocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles)
 		return EBADF;
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) || ((fp->f_flag & FREAD) == 0))
 		return EBADF;
 	if (iocb.aio_offset == -1LL)
 		return EINVAL;
 
 	auio.uio_resid = iocb.aio_nbytes;
 	if (auio.uio_resid < 0)
 		return (EINVAL);
 
 	/*
 	 * Process sync simply -- queue async request.
 	 */
 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
 	}
 
 	aiov.iov_base = (void *) iocb.aio_buf;
 	aiov.iov_len = iocb.aio_nbytes;
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = iocb.aio_offset;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 
 	cnt = iocb.aio_nbytes;
 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred, FOF_OFFSET);
 	if (error &&
 		(auio.uio_resid != cnt) &&
 		(error == ERESTART || error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	cnt -= auio.uio_resid;
 	p->p_retval[0] = cnt;
 	return error;
 }
 
 int
 aio_write(struct proc *p, struct aio_write_args *uap)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	struct aiocb iocb;
 	int error;
 	int pmodes;
 
 	/*
 	 * Process sync simply -- queue async request.
 	 */
 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE);
 	}
 
 	if ((error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb)) != 0)
 		return error;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = iocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles)
 		return EBADF;
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0))
 		return EBADF;
 	if (iocb.aio_offset == -1LL)
 		return EINVAL;
 
 	aiov.iov_base = (void *) iocb.aio_buf;
 	aiov.iov_len = iocb.aio_nbytes;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = iocb.aio_offset;
 
 	auio.uio_resid = iocb.aio_nbytes;
 	if (auio.uio_resid < 0)
 		return (EINVAL);
 
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 
 	cnt = iocb.aio_nbytes;
 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred, FOF_OFFSET);
 	if (error) {
 		if (auio.uio_resid != cnt) {
 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
 				error = 0;
 			if (error == EPIPE)
 				psignal(p, SIGPIPE);
 		}
 	}
 	cnt -= auio.uio_resid;
 	p->p_retval[0] = cnt;
 	return error;
 }
 
 int
 lio_listio(struct proc *p, struct lio_listio_args *uap)
 {
 	int nent, nentqueued;
 	struct aiocb *iocb, * const *cbptr;
 	struct aiocblist *cb;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj;
 	int error, runningcode;
 	int nerror;
 	int i;
 	int s;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) {
 		return EINVAL;
 	}
 
 	nent = uap->nent;
 	if (nent > AIO_LISTIO_MAX) {
 		return EINVAL;
 	}
 
 	if (p->p_aioinfo == NULL) {
 		aio_init_aioinfo(p);
 	}
 
 	if ((nent + num_queue_count) > max_queue_count) {
 		return EAGAIN;
 	}
 
 	ki = p->p_aioinfo;
 	if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) {
 		return EAGAIN;
 	}
 
 	lj = zalloc(aiolio_zone);
 	if (!lj) {
 		return EAGAIN;
 	}
 
 	lj->lioj_flags = 0;
 	lj->lioj_buffer_count = 0;
 	lj->lioj_buffer_finished_count = 0;
 	lj->lioj_queue_count = 0;
 	lj->lioj_queue_finished_count = 0;
 	lj->lioj_ki = ki;
 	TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list);
 
 	/*
 	 * Setup signal
 	 */
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &lj->lioj_signal, sizeof lj->lioj_signal);
 		if (error)
 			return error;
 		lj->lioj_flags |= LIOJ_SIGNAL;
 		lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED;
 	} else {
 		lj->lioj_flags &= ~LIOJ_SIGNAL;
 	}
 
 /*
  * get pointers to the list of I/O requests
  */
 
 	nerror = 0;
 	nentqueued = 0;
 	cbptr = uap->acb_list;
 	for(i = 0; i < uap->nent; i++) {
 		iocb = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]);
 		if (((intptr_t) iocb != -1) && ((intptr_t) iocb != NULL)) {
 			error = _aio_aqueue(p, iocb, lj, 0);
 			if (error == 0) {
 				nentqueued++;
 			} else {
 				nerror++;
 			}
 		}
 	}
 
 	/*
 	 * If we haven't queued any, then just return error
 	 */
 	if (nentqueued == 0) {
 		return 0;
 	}
 
 	/*
 	 * Calculate the appropriate error return
 	 */
 	runningcode = 0;
 	if (nerror)
 		runningcode = EIO;
 
 	if (uap->mode == LIO_WAIT) {
 		while (1) {
 			int found;
 			found = 0;
 			for(i = 0; i < uap->nent; i++) {
 				int jobref, command;
 
 				/*
 				 * Fetch address of the control buf pointer in user space
 				 */
 				iocb = (struct aiocb *) (intptr_t) fuword((caddr_t) &cbptr[i]);
 				if (((intptr_t) iocb == -1) || ((intptr_t) iocb == 0))
 					continue;
 
 				/*
 				 * Fetch the associated command from user space
 				 */
 				command = fuword(&iocb->aio_lio_opcode);
 				if (command == LIO_NOP) {
 					found++;
 					continue;
 				}
 
 				jobref = fuword(&iocb->_aiocb_private.kernelinfo);
 
 				for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 					cb;
 					cb = TAILQ_NEXT(cb, plist)) {
 					if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) ==
 						jobref) {
 						if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
 							curproc->p_stats->p_ru.ru_oublock +=
 								cb->outputcharge;
 							cb->outputcharge = 0;
 						} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
 							curproc->p_stats->p_ru.ru_inblock +=
 								cb->inputcharge;
 							cb->inputcharge = 0;
 						}
 						found++;
 						break;
 					}
 				}
 
 				s = splbio();
 				for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 					cb;
 					cb = TAILQ_NEXT(cb, plist)) {
 					if (((intptr_t) cb->uaiocb._aiocb_private.kernelinfo) ==
 						jobref) {
 						found++;
 						break;
 					}
 				}
 				splx(s);
 				
 			}
 
 			/*
 			 * If all I/Os have been disposed of, then we can return
 			 */
 			if (found == nentqueued) {
 				return runningcode;
 			}
 			
 			ki->kaio_flags |= KAIO_WAKEUP;
 			error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0);
 
 			if (error == EINTR) {
 				return EINTR;
 			} else if (error == EWOULDBLOCK) {
 				return EAGAIN;
 			}
 
 		}
 	}
 
 	return runningcode;
 }
 
 /*
  * This is a wierd hack so that we can post a signal.  It is safe
  * to do so from a timeout routine, but *not* from an interrupt routine.
  */
 static void
 process_signal(void *ljarg)
 {
 	struct aio_liojob *lj = ljarg;
 	if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) {
 		if (lj->lioj_queue_count == lj->lioj_queue_finished_count) {
 			psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo);
 			lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 		}
 	}
 }
 
 /*
  * Interrupt handler for physio, performs the necessary process wakeups,
  * and signals.
  */
 static void
 aio_physwakeup(bp)
 	struct buf *bp;
 {
 	struct aiocblist *aiocbe;
 	struct proc *p;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj;
 	int s;
 	s = splbio();
 
 	wakeup((caddr_t) bp);
 	bp->b_flags &= ~B_CALL;
 	bp->b_flags |= B_DONE;
 
 	aiocbe = (struct aiocblist *)bp->b_spc;
 	if (aiocbe) {
 		p = bp->b_caller1;
 
 		aiocbe->jobstate = JOBST_JOBBFINISHED;
 		aiocbe->uaiocb._aiocb_private.status -= bp->b_resid;
 		aiocbe->uaiocb._aiocb_private.error = 0;
 		aiocbe->jobflags |= AIOCBLIST_DONE;
 
 		if (bp->b_flags & B_ERROR) {
 			aiocbe->uaiocb._aiocb_private.error = bp->b_error;
 		}
 
 		lj = aiocbe->lio;
 		if (lj) {
 			lj->lioj_buffer_finished_count++;
 			/*
 			 * wakeup/signal if all of the interrupt jobs are done
 			 */
 			if (lj->lioj_buffer_finished_count == lj->lioj_buffer_count) {
 				/*
 				 * post a signal if it is called for
 				 */
 				if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) ==
 					LIOJ_SIGNAL) {
 					lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 					timeout(process_signal, lj, 0);
 				}
 			}
 		}
 
 		ki = p->p_aioinfo;
 		if (ki) {
 			ki->kaio_buffer_finished_count++;
 			TAILQ_REMOVE(&aio_bufjobs, aiocbe, list);
 			TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist);
 			TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist);
 			/*
 			 * and do the wakeup
 			 */
 			if (ki->kaio_flags & (KAIO_RUNDOWN|KAIO_WAKEUP)) {
 				ki->kaio_flags &= ~KAIO_WAKEUP;
 				wakeup(p);
 			}
 		}
 	}
 	splx(s);
 }
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c	(revision 49534)
+++ head/sys/kern/vfs_bio.c	(revision 49535)
@@ -1,3106 +1,3106 @@
 /*
  * Copyright (c) 1994,1997 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  *
- * $Id: vfs_bio.c,v 1.223 1999/07/09 16:41:19 peter Exp $
+ * $Id: vfs_bio.c,v 1.224 1999/07/26 06:25:16 alc Exp $
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  *
  * see man buf(9) for more info.
  */
 
 #define VMIO
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/lock.h>
-#include <miscfs/specfs/specdev.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
+#include <sys/conf.h>
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
 struct	bio_ops bioops;		/* I/O operation notification */
 
 struct buf *buf;		/* buffer header pool */
 struct swqueue bswlist;
 
 static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
 			       int pageno, vm_page_t m);
 static void vfs_clean_pages(struct buf * bp);
 static void vfs_setdirty(struct buf *bp);
 static void vfs_vmio_release(struct buf *bp);
 static int flushbufqueues(void);
 
 static int bd_request;
 
 static void buf_daemon __P((void));
 /*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
  * for input in the case of buffers partially already in memory,
  * but the code is intricate enough already.
  */
 vm_page_t bogus_page;
 int runningbufspace;
 int vmiodirenable = FALSE;
 static vm_offset_t bogus_offset;
 
 static int bufspace, maxbufspace, vmiospace, 
 	bufmallocspace, maxbufmallocspace, hibufspace;
 #if 0
 static int maxvmiobufspace;
 #endif
 static int maxbdrun;
 static int needsbuffer;
 static int numdirtybuffers, lodirtybuffers, hidirtybuffers;
 static int numfreebuffers, lofreebuffers, hifreebuffers;
 static int getnewbufcalls;
 static int getnewbufrestarts;
 static int kvafreespace;
 
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD,
 	&numdirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW,
 	&lodirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW,
 	&hidirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD,
 	&numfreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW,
 	&lofreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW,
 	&hifreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD,
 	&runningbufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW,
 	&maxbufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD,
 	&hibufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
 	&bufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxbdrun, CTLFLAG_RW,
 	&maxbdrun, 0, "");
 #if 0
 SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW,
 	&maxvmiobufspace, 0, "");
 #endif
 SYSCTL_INT(_vfs, OID_AUTO, vmiospace, CTLFLAG_RD,
 	&vmiospace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW,
 	&maxbufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD,
 	&bufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, kvafreespace, CTLFLAG_RD,
 	&kvafreespace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW,
 	&getnewbufcalls, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW,
 	&getnewbufrestarts, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW,
 	&vmiodirenable, 0, "");
 
 
 static int bufhashmask;
 static LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
 struct bqueues bufqueues[BUFFER_QUEUES] = { { 0 } };
 char *buf_wmesg = BUF_WMESG;
 
 extern int vm_swap_size;
 
 #define BUF_MAXUSE		24
 
 #define VFS_BIO_NEED_ANY	0x01	/* any freeable buffer */
 #define VFS_BIO_NEED_DIRTYFLUSH	0x02	/* waiting for dirty buffer flush */
 #define VFS_BIO_NEED_FREE	0x04	/* wait for free bufs, hi hysteresis */
 #define VFS_BIO_NEED_BUFSPACE	0x08	/* wait for buf space, lo hysteresis */
 #define VFS_BIO_NEED_KVASPACE	0x10	/* wait for buffer_map space, emerg  */
 
 /*
  * Buffer hash table code.  Note that the logical block scans linearly, which
  * gives us some L1 cache locality.
  */
 
 static __inline 
 struct bufhashhdr *
 bufhash(struct vnode *vnp, daddr_t bn)
 {
 	return(&bufhashtbl[(((uintptr_t)(vnp) >> 7) + (int)bn) & bufhashmask]);
 }
 
 /*
  *	kvaspacewakeup:
  *
  *	Called when kva space is potential available for recovery or when
  *	kva space is recovered in the buffer_map.  This function wakes up
  *	anyone waiting for buffer_map kva space.  Even though the buffer_map
  *	is larger then maxbufspace, this situation will typically occur 
  *	when the buffer_map gets fragmented.
  */
 
 static __inline void
 kvaspacewakeup(void)
 {
 	/*
 	 * If someone is waiting for KVA space, wake them up.  Even
 	 * though we haven't freed the kva space yet, the waiting
 	 * process will be able to now.
 	 */
 	if (needsbuffer & VFS_BIO_NEED_KVASPACE) {
 		needsbuffer &= ~VFS_BIO_NEED_KVASPACE;
 		wakeup(&needsbuffer);
 	}
 }
 
 /*
  *	numdirtywakeup:
  *
  *	If someone is blocked due to there being too many dirty buffers,
  *	and numdirtybuffers is now reasonable, wake them up.
  */
 
 static __inline void
 numdirtywakeup(void)
 {
 	if (numdirtybuffers < hidirtybuffers) {
 		if (needsbuffer & VFS_BIO_NEED_DIRTYFLUSH) {
 			needsbuffer &= ~VFS_BIO_NEED_DIRTYFLUSH;
 			wakeup(&needsbuffer);
 		}
 	}
 }
 
 /*
  *	bufspacewakeup:
  *
  *	Called when buffer space is potentially available for recovery or when
  *	buffer space is recovered.  getnewbuf() will block on this flag when
  *	it is unable to free sufficient buffer space.  Buffer space becomes
  *	recoverable when bp's get placed back in the queues.
  */
 
 static __inline void
 bufspacewakeup(void)
 {
 	/*
 	 * If someone is waiting for BUF space, wake them up.  Even
 	 * though we haven't freed the kva space yet, the waiting
 	 * process will be able to now.
 	 */
 	if (needsbuffer & VFS_BIO_NEED_BUFSPACE) {
 		needsbuffer &= ~VFS_BIO_NEED_BUFSPACE;
 		wakeup(&needsbuffer);
 	}
 }
 
 /*
  *	bufcountwakeup:
  *
  *	Called when a buffer has been added to one of the free queues to
  *	account for the buffer and to wakeup anyone waiting for free buffers.
  *	This typically occurs when large amounts of metadata are being handled
  *	by the buffer cache ( else buffer space runs out first, usually ).
  */
 
 static __inline void
 bufcountwakeup(void) 
 {
 	++numfreebuffers;
 	if (needsbuffer) {
 		needsbuffer &= ~VFS_BIO_NEED_ANY;
 		if (numfreebuffers >= hifreebuffers)
 			needsbuffer &= ~VFS_BIO_NEED_FREE;
 		wakeup(&needsbuffer);
 	}
 }
 
 /*
  *	vfs_buf_test_cache:
  *
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
  */
 static __inline__
 void
 vfs_buf_test_cache(struct buf *bp,
 		  vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
 		  vm_page_t m)
 {
 	if (bp->b_flags & B_CACHE) {
 		int base = (foff + off) & PAGE_MASK;
 		if (vm_page_is_valid(m, base, size) == 0)
 			bp->b_flags &= ~B_CACHE;
 	}
 }
 
 static __inline__
 void
 bd_wakeup(int dirtybuflevel)
 {
 	if (numdirtybuffers >= dirtybuflevel && bd_request == 0) {
 		bd_request = 1;
 		wakeup(&bd_request);
 	}
 }
 
 
 /*
  * Initialize buffer headers and related structures. 
  */
 
 caddr_t
 bufhashinit(caddr_t vaddr)
 {
 	/* first, make a null hash table */
 	for (bufhashmask = 8; bufhashmask < nbuf / 4; bufhashmask <<= 1)
 		;
 	bufhashtbl = (void *)vaddr;
 	vaddr = vaddr + sizeof(*bufhashtbl) * bufhashmask;
 	--bufhashmask;
 	return(vaddr);
 }
 
 void
 bufinit(void)
 {
 	struct buf *bp;
 	int i;
 
 	TAILQ_INIT(&bswlist);
 	LIST_INIT(&invalhash);
 	simple_lock_init(&buftimelock);
 
 	for (i = 0; i <= bufhashmask; i++)
 		LIST_INIT(&bufhashtbl[i]);
 
 	/* next, make a null set of free lists */
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;	/* we're just an empty header */
 		bp->b_dev = NODEV;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_xflags = 0;
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	}
 
 	/*
 	 * maxbufspace is currently calculated to support all filesystem 
 	 * blocks to be 8K.  If you happen to use a 16K filesystem, the size
 	 * of the buffer cache is still the same as it would be for 8K 
 	 * filesystems.  This keeps the size of the buffer cache "in check" 
 	 * for big block filesystems.
 	 *
 	 * maxbufspace is calculated as around 50% of the KVA available in
 	 * the buffer_map ( DFLTSIZE vs BKVASIZE ), I presume to reduce the 
 	 * effect of fragmentation.
 	 */
 	maxbufspace = (nbuf + 8) * DFLTBSIZE;
 	if ((hibufspace = maxbufspace - MAXBSIZE * 5) <= MAXBSIZE)
 		hibufspace = 3 * maxbufspace / 4;
 #if 0
 /*
  * reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed
  */
 	maxvmiobufspace = 2 * hibufspace / 3;
 #endif
 /*
  * Limit the amount of malloc memory since it is wired permanently into
  * the kernel space.  Even though this is accounted for in the buffer
  * allocation, we don't want the malloced region to grow uncontrolled.
  * The malloc scheme improves memory utilization significantly on average
  * (small) directories.
  */
 	maxbufmallocspace = hibufspace / 20;
 
 /*
  * Reduce the chance of a deadlock occuring by limiting the number
  * of delayed-write dirty buffers we allow to stack up.
  */
 	lodirtybuffers = nbuf / 7 + 10;
 	hidirtybuffers = nbuf / 4 + 20;
 	numdirtybuffers = 0;
 
 /*
  * Try to keep the number of free buffers in the specified range,
  * and give the syncer access to an emergency reserve.
  */
 	lofreebuffers = nbuf / 18 + 5;
 	hifreebuffers = 2 * lofreebuffers;
 	numfreebuffers = nbuf;
 
 /*
  * Maximum number of async ops initiated per buf_daemon loop.  This is
  * somewhat of a hack at the moment, we really need to limit ourselves
  * based on the number of bytes of I/O in-transit that were initiated
  * from buf_daemon.
  */
 	if ((maxbdrun = nswbuf / 4) < 4)
 		maxbdrun = 4;
 
 	kvafreespace = 0;
 
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 			VM_ALLOC_NORMAL);
 
 }
 
 /*
  * Free the kva allocation for a buffer
  * Must be called only at splbio or higher,
  *  as this is the only locking for buffer_map.
  */
 static void
 bfreekva(struct buf * bp)
 {
 	if (bp->b_kvasize) {
 		vm_map_delete(buffer_map,
 		    (vm_offset_t) bp->b_kvabase,
 		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize
 		);
 		bp->b_kvasize = 0;
 		kvaspacewakeup();
 	}
 }
 
 /*
  *	bremfree:
  *
  *	Remove the buffer from the appropriate free list.
  */
 void
 bremfree(struct buf * bp)
 {
 	int s = splbio();
 	int old_qindex = bp->b_qindex;
 
 	if (bp->b_qindex != QUEUE_NONE) {
 		if (bp->b_qindex == QUEUE_EMPTYKVA) {
 			kvafreespace -= bp->b_kvasize;
 		}
 		KASSERT(BUF_REFCNT(bp) == 1, ("bremfree: bp %p not locked",bp));
 		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 		runningbufspace += bp->b_bufsize;
 	} else {
 #if !defined(MAX_PERF)
 		if (BUF_REFCNT(bp) <= 1)
 			panic("bremfree: removing a buffer not on a queue");
 #endif
 	}
 
 	/*
 	 * Fixup numfreebuffers count.  If the buffer is invalid or not
 	 * delayed-write, and it was on the EMPTY, LRU, or AGE queues,
 	 * the buffer was free and we must decrement numfreebuffers.
 	 */
 	if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
 		switch(old_qindex) {
 		case QUEUE_DIRTY:
 		case QUEUE_CLEAN:
 		case QUEUE_EMPTY:
 		case QUEUE_EMPTYKVA:
 			--numfreebuffers;
 			break;
 		default:
 			break;
 		}
 	}
 	splx(s);
 }
 
 
 /*
  * Get a buffer with the specified data.  Look in the cache first.  We
  * must clear B_ERROR and B_INVAL prior to initiating I/O.  If B_CACHE
  * is set, the buffer is valid and we do not have to do anything ( see
  * getblk() ).
  */
 int
 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
     struct buf ** bpp)
 {
 	struct buf *bp;
 
 	bp = getblk(vp, blkno, size, 0, 0);
 	*bpp = bp;
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != NULL)
 			curproc->p_stats->p_ru.ru_inblock++;
 		KASSERT(!(bp->b_flags & B_ASYNC), ("bread: illegal async bp %p", bp));
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(vp, bp);
 		return (biowait(bp));
 	}
 	return (0);
 }
 
 /*
  * Operates like bread, but also starts asynchronous I/O on
  * read-ahead blocks.  We must clear B_ERROR and B_INVAL prior
  * to initiating I/O . If B_CACHE is set, the buffer is valid 
  * and we do not have to do anything.
  */
 int
 breadn(struct vnode * vp, daddr_t blkno, int size,
     daddr_t * rablkno, int *rabsize,
     int cnt, struct ucred * cred, struct buf ** bpp)
 {
 	struct buf *bp, *rabp;
 	int i;
 	int rv = 0, readwait = 0;
 
 	*bpp = bp = getblk(vp, blkno, size, 0, 0);
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != NULL)
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(vp, bp);
 		++readwait;
 	}
 
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (curproc != NULL)
 				curproc->p_stats->p_ru.ru_inblock++;
 			rabp->b_flags |= B_READ | B_ASYNC;
 			rabp->b_flags &= ~(B_ERROR | B_INVAL);
 			if (rabp->b_rcred == NOCRED) {
 				if (cred != NOCRED)
 					crhold(cred);
 				rabp->b_rcred = cred;
 			}
 			vfs_busy_pages(rabp, 0);
 			BUF_KERNPROC(rabp);
 			VOP_STRATEGY(vp, rabp);
 		} else {
 			brelse(rabp);
 		}
 	}
 
 	if (readwait) {
 		rv = biowait(bp);
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here 
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 int
 bwrite(struct buf * bp)
 {
 	int oldflags, s;
 	struct vnode *vp;
 	struct mount *mp;
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	oldflags = bp->b_flags;
 
 #if !defined(MAX_PERF)
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not busy???");
 #endif
 	s = splbio();
 	bundirty(bp);
 
 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR);
 	bp->b_flags |= B_WRITEINPROG | B_CACHE;
 
 	bp->b_vp->v_numoutput++;
 	vfs_busy_pages(bp, 1);
 	if (curproc != NULL)
 		curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 	if (oldflags & B_ASYNC)
 		BUF_KERNPROC(bp);
 	VOP_STRATEGY(bp->b_vp, bp);
 
 	/*
 	 * Collect statistics on synchronous and asynchronous writes.
 	 * Writes to block devices are charged to their associated
 	 * filesystem (if any).
 	 */
 	if ((vp = bp->b_vp) != NULL) {
 		if (vp->v_type == VBLK)
 			mp = vp->v_specmountpoint;
 		else
 			mp = vp->v_mount;
 		if (mp != NULL) {
 			if ((oldflags & B_ASYNC) == 0)
 				mp->mnt_stat.f_syncwrites++;
 			else
 				mp->mnt_stat.f_asyncwrites++;
 		}
 	}
 
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 		brelse(bp);
 		return (rtval);
 	}
 
 	return (0);
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).  Do not bother writing
  * anything if the buffer is marked invalid.
  *
  * Note that since the buffer must be completely valid, we can safely
  * set B_CACHE.  In fact, we have to set B_CACHE here rather then in
  * biodone() in order to prevent getblk from writing the buffer
  * out synchronously.
  */
 void
 bdwrite(struct buf * bp)
 {
 #if 0
 	struct vnode *vp;
 #endif
 
 #if !defined(MAX_PERF)
 	if (BUF_REFCNT(bp) == 0)
 		panic("bdwrite: buffer is not busy");
 #endif
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 	bdirty(bp);
 
 	/*
 	 * Set B_CACHE, indicating that the buffer is fully valid.  This is
 	 * true even of NFS now.
 	 */
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * This bmap keeps the system from needing to do the bmap later,
 	 * perhaps when the system is attempting to do a sync.  Since it
 	 * is likely that the indirect block -- or whatever other datastructure
 	 * that the filesystem needs is still in memory now, it is a good
 	 * thing to do this.  Note also, that if the pageout daemon is
 	 * requesting a sync -- there might not be enough memory to do
 	 * the bmap then...  So, this is important to do.
 	 */
 	if (bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
 	vfs_setdirty(bp);
 
 	/*
 	 * We need to do this here to satisfy the vnode_pager and the
 	 * pageout daemon, so that it thinks that the pages have been
 	 * "cleaned".  Note that since the pages are in a delayed write
 	 * buffer -- the VFS layer "will" see that the pages get written
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
 	bqrelse(bp);
 
 	/*
 	 * Wakeup the buffer flushing daemon if we have saturated the
 	 * buffer cache.
 	 */
 
 	bd_wakeup(hidirtybuffers);
 
 	/*
 	 * note: we cannot initiate I/O from a bdwrite even if we wanted to,
 	 * due to the softdep code.
 	 */
 #if 0
 	/*
 	 * XXX The soft dependency code is not prepared to
 	 * have I/O done when a bdwrite is requested. For
 	 * now we just let the write be delayed if it is
 	 * requested by the soft dependency code.
 	 */
 	if ((vp = bp->b_vp) &&
 	    ((vp->v_type == VBLK && vp->v_specmountpoint &&
 		  (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) ||
 		 (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))))
 		return;
 #endif
 }
 
 /*
  *	bdirty:
  *
  *	Turn buffer into delayed write request.  We must clear B_READ and
  *	B_RELBUF, and we must set B_DELWRI.  We reassign the buffer to 
  *	itself to properly update it in the dirty/clean lists.  We mark it
  *	B_DONE to ensure that any asynchronization of the buffer properly
  *	clears B_DONE ( else a panic will occur later ).  
  *
  *	bdirty() is kinda like bdwrite() - we have to clear B_INVAL which
  *	might have been set pre-getblk().  Unlike bwrite/bdwrite, bdirty()
  *	should only be called if the buffer is known-good.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 void
 bdirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE, ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
 	bp->b_flags &= ~(B_READ|B_RELBUF);
 
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= B_DONE | B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		++numdirtybuffers;
 		bd_wakeup(hidirtybuffers);
 	}
 }
 
 /*
  *	bundirty:
  *
  *	Clear B_DELWRI for buffer.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *	
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 
 void
 bundirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE, ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
 
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_flags &= ~B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		--numdirtybuffers;
 		numdirtywakeup();
 	}
 }
 
 /*
  *	bawrite:
  *
  *	Asynchronous write.  Start output on a buffer, but do not wait for
  *	it to complete.  The buffer is released when the output completes.
  *
  *	bwrite() ( or the VOP routine anyway ) is responsible for handling 
  *	B_INVAL buffers.  Not us.
  */
 void
 bawrite(struct buf * bp)
 {
 	bp->b_flags |= B_ASYNC;
 	(void) VOP_BWRITE(bp->b_vp, bp);
 }
 
 /*
  *	bowrite:
  *
  *	Ordered write.  Start output on a buffer, and flag it so that the 
  *	device will write it in the order it was queued.  The buffer is 
  *	released when the output completes.  bwrite() ( or the VOP routine
  *	anyway ) is responsible for handling B_INVAL buffers.
  */
 int
 bowrite(struct buf * bp)
 {
 	bp->b_flags |= B_ORDERED | B_ASYNC;
 	return (VOP_BWRITE(bp->b_vp, bp));
 }
 
 /*
  *	bwillwrite:
  *
  *	Called prior to the locking of any vnodes when we are expecting to
  *	write.  We do not want to starve the buffer cache with too many
  *	dirty buffers so we block here.  By blocking prior to the locking
  *	of any vnodes we attempt to avoid the situation where a locked vnode
  *	prevents the various system daemons from flushing related buffers.
  */
 
 void
 bwillwrite(void)
 {
 	int twenty = (hidirtybuffers - lodirtybuffers) / 5;
 
 	if (numdirtybuffers > hidirtybuffers + twenty) {
 		int s;
 
 		s = splbio();
 		while (numdirtybuffers > hidirtybuffers) {
 			bd_wakeup(hidirtybuffers);
 			needsbuffer |= VFS_BIO_NEED_DIRTYFLUSH;
 			tsleep(&needsbuffer, (PRIBIO + 4), "flswai", 0);
 		}
 		splx(s);
 	}
 }
 
 /*
  *	brelse:
  *
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 #if 0
 	if (bp->b_flags & B_CLUSTER) {
 		relpbuf(bp, NULL);
 		return;
 	}
 #endif
 
 	s = splbio();
 
 	if (bp->b_flags & B_LOCKED)
 		bp->b_flags &= ~B_ERROR;
 
 	if ((bp->b_flags & (B_READ | B_ERROR)) == B_ERROR) {
 		/*
 		 * Failed write, redirty.  Must clear B_ERROR to prevent
 		 * pages from being scrapped.  Note: B_INVAL is ignored
 		 * here but will presumably be dealt with later.
 		 */
 		bp->b_flags &= ~B_ERROR;
 		bdirty(bp);
 	} else if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_FREEBUF)) ||
 	    (bp->b_bufsize <= 0)) {
 		/*
 		 * Either a failed I/O or we were asked to free or not
 		 * cache the buffer.
 		 */
 		bp->b_flags |= B_INVAL;
 		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
 			(*bioops.io_deallocate)(bp);
 		if (bp->b_flags & B_DELWRI) {
 			--numdirtybuffers;
 			numdirtywakeup();
 		}
 		bp->b_flags &= ~(B_DELWRI | B_CACHE | B_FREEBUF);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			if (bp->b_bufsize)
 				allocbuf(bp, 0);
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 	}
 
 	/*
 	 * We must clear B_RELBUF if B_DELWRI is set.  If vfs_vmio_release() 
 	 * is called with B_DELWRI set, the underlying pages may wind up
 	 * getting freed causing a previous write (bdwrite()) to get 'lost'
 	 * because pages associated with a B_DELWRI bp are marked clean.
 	 * 
 	 * We still allow the B_INVAL case to call vfs_vmio_release(), even
 	 * if B_DELWRI is set.
 	 */
 
 	if (bp->b_flags & B_DELWRI)
 		bp->b_flags &= ~B_RELBUF;
 
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, not even NFS buffers now.  Two flags effect this.  If
 	 * B_INVAL, the struct buf is invalidated but the VM object is kept
 	 * around ( i.e. so it is trivial to reconstitute the buffer later ).
 	 *
 	 * If B_ERROR or B_NOCACHE is set, pages in the VM object will be
 	 * invalidated.  B_ERROR cannot be set for a failed write unless the
 	 * buffer is also B_INVAL because it hits the re-dirtying code above.
 	 *
 	 * Normally we can do this whether a buffer is B_DELWRI or not.  If
 	 * the buffer is an NFS buffer, it is tracking piecemeal writes or
 	 * the commit state and we cannot afford to lose the buffer.
 	 */
 	if ((bp->b_flags & B_VMIO)
 	    && !(bp->b_vp->v_tag == VT_NFS &&
 		 bp->b_vp->v_type != VBLK &&
 		 (bp->b_flags & B_DELWRI))
 	    ) {
 
 		int i, j, resid;
 		vm_page_t m;
 		off_t foff;
 		vm_pindex_t poff;
 		vm_object_t obj;
 		struct vnode *vp;
 
 		vp = bp->b_vp;
 
 		/*
 		 * Get the base offset and length of the buffer.  Note that 
 		 * for block sizes that are less then PAGE_SIZE, the b_data
 		 * base of the buffer does not represent exactly b_offset and
 		 * neither b_offset nor b_size are necessarily page aligned.
 		 * Instead, the starting position of b_offset is:
 		 *
 		 * 	b_data + (b_offset & PAGE_MASK)
 		 *
 		 * block sizes less then DEV_BSIZE (usually 512) are not 
 		 * supported due to the page granularity bits (m->valid,
 		 * m->dirty, etc...). 
 		 *
 		 * See man buf(9) for more information
 		 */
 
 		resid = bp->b_bufsize;
 		foff = bp->b_offset;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			m = bp->b_pages[i];
 			vm_page_flag_clear(m, PG_ZERO);
 			if (m == bogus_page) {
 
 				obj = (vm_object_t) vp->v_object;
 				poff = OFF_TO_IDX(bp->b_offset);
 
 				for (j = i; j < bp->b_npages; j++) {
 					m = bp->b_pages[j];
 					if (m == bogus_page) {
 						m = vm_page_lookup(obj, poff + j);
 #if !defined(MAX_PERF)
 						if (!m) {
 							panic("brelse: page missing\n");
 						}
 #endif
 						bp->b_pages[j] = m;
 					}
 				}
 
 				if ((bp->b_flags & B_INVAL) == 0) {
 					pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 				}
 			}
 			if (bp->b_flags & (B_NOCACHE|B_ERROR)) {
 				int poffset = foff & PAGE_MASK;
 				int presid = resid > (PAGE_SIZE - poffset) ?
 					(PAGE_SIZE - poffset) : resid;
 
 				KASSERT(presid >= 0, ("brelse: extra page"));
 				vm_page_set_invalid(m, poffset, presid);
 			}
 			resid -= PAGE_SIZE - (foff & PAGE_MASK);
 			foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 		}
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	} else if (bp->b_flags & B_VMIO) {
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	}
 			
 #if !defined(MAX_PERF)
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("brelse: free buffer onto another queue???");
 #endif
 	if (BUF_REFCNT(bp) > 1) {
 		/* Temporary panic to verify exclusive locking */
 		/* This panic goes away when we allow shared refs */
 		panic("brelse: multiple refs");
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 
 	/* enqueue */
 
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		bp->b_flags |= B_INVAL;
 		if (bp->b_kvasize)
 			bp->b_qindex = QUEUE_EMPTYKVA;
 		else
 			bp->b_qindex = QUEUE_EMPTY;
 		TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 		kvafreespace += bp->b_kvasize;
 		if (bp->b_kvasize)
 			kvaspacewakeup();
 	/* buffers with junk contents */
 	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
 		bp->b_flags |= B_INVAL;
 		bp->b_qindex = QUEUE_CLEAN;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 
 	/* buffers that are locked */
 	} else if (bp->b_flags & B_LOCKED) {
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 
 	/* remaining buffers */
 	} else {
 		switch(bp->b_flags & (B_DELWRI|B_AGE)) {
 		case B_DELWRI | B_AGE:
 		    bp->b_qindex = QUEUE_DIRTY;
 		    TAILQ_INSERT_HEAD(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 		    break;
 		case B_DELWRI:
 		    bp->b_qindex = QUEUE_DIRTY;
 		    TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 		    break;
 		case B_AGE:
 		    bp->b_qindex = QUEUE_CLEAN;
 		    TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		    break;
 		default:
 		    bp->b_qindex = QUEUE_CLEAN;
 		    TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		    break;
 		}
 	}
 
 	/*
 	 * If B_INVAL, clear B_DELWRI.  We've already placed the buffer
 	 * on the correct queue.
 	 */
 	if ((bp->b_flags & (B_INVAL|B_DELWRI)) == (B_INVAL|B_DELWRI)) {
 		bp->b_flags &= ~B_DELWRI;
 		--numdirtybuffers;
 		numdirtywakeup();
 	}
 
 	runningbufspace -= bp->b_bufsize;
 
 	/*
 	 * Fixup numfreebuffers count.  The bp is on an appropriate queue
 	 * unless locked.  We then bump numfreebuffers if it is not B_DELWRI.
 	 * We've already handled the B_INVAL case ( B_DELWRI will be clear
 	 * if B_INVAL is set ).
 	 */
 
 	if ((bp->b_flags & B_LOCKED) == 0 && !(bp->b_flags & B_DELWRI))
 		bufcountwakeup();
 
 	/*
 	 * Something we can maybe free.
 	 */
 
 	if (bp->b_bufsize)
 		bufspacewakeup();
 
 	/* unlock */
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	splx(s);
 }
 
 /*
  * Release a buffer back to the appropriate queue but do not try to free
  * it.
  *
  * bqrelse() is used by bdwrite() to requeue a delayed write, and used by
  * biodone() to requeue an async I/O on completion.  It is also used when
  * known good buffers need to be requeued but we think we may need the data
  * again soon.
  */
 void
 bqrelse(struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 #if !defined(MAX_PERF)
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("bqrelse: free buffer onto another queue???");
 #endif
 	if (BUF_REFCNT(bp) > 1) {
 		/* do not release to free list */
 		panic("bqrelse: multiple refs");
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 	if (bp->b_flags & B_LOCKED) {
 		bp->b_flags &= ~B_ERROR;
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 		/* buffers with stale but valid contents */
 	} else if (bp->b_flags & B_DELWRI) {
 		bp->b_qindex = QUEUE_DIRTY;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 	} else {
 		bp->b_qindex = QUEUE_CLEAN;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 	}
 
 	runningbufspace -= bp->b_bufsize;
 
 	if ((bp->b_flags & B_LOCKED) == 0 &&
 	    ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))) {
 		bufcountwakeup();
 	}
 
 	/*
 	 * Something we can maybe wakeup
 	 */
 	if (bp->b_bufsize && !(bp->b_flags & B_DELWRI))
 		bufspacewakeup();
 
 	/* unlock */
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ORDERED | B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	splx(s);
 }
 
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
 {
 	int i, s;
 	vm_page_t m;
 
 	s = splvm();
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
 		/*
 		 * In order to keep page LRU ordering consistent, put
 		 * everything on the inactive queue.
 		 */
 		vm_page_unwire(m, 0);
 		/*
 		 * We don't mess with busy pages, it is
 		 * the responsibility of the process that
 		 * busied the pages to deal with them.
 		 */
 		if ((m->flags & PG_BUSY) || (m->busy != 0))
 			continue;
 			
 		if (m->wire_count == 0) {
 			vm_page_flag_clear(m, PG_ZERO);
 			/*
 			 * Might as well free the page if we can and it has
 			 * no valid data.
 			 */
 			if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && m->hold_count == 0) {
 				vm_page_busy(m);
 				vm_page_protect(m, VM_PROT_NONE);
 				vm_page_free(m);
 			}
 		}
 	}
 	bufspace -= bp->b_bufsize;
 	vmiospace -= bp->b_bufsize;
 	runningbufspace -= bp->b_bufsize;
 	splx(s);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	if (bp->b_bufsize)
 		bufspacewakeup();
 	bp->b_npages = 0;
 	bp->b_bufsize = 0;
 	bp->b_flags &= ~B_VMIO;
 	if (bp->b_vp)
 		brelvp(bp);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 gbincore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 	struct bufhashhdr *bh;
 
 	bh = bufhash(vp, blkno);
 	bp = bh->lh_first;
 
 	/* Search hash chain */
 	while (bp != NULL) {
 		/* hit */
 		if (bp->b_vp == vp && bp->b_lblkno == blkno &&
 		    (bp->b_flags & B_INVAL) == 0) {
 			break;
 		}
 		bp = bp->b_hash.le_next;
 	}
 	return (bp);
 }
 
 /*
  *	vfs_bio_awrite:
  *
  *	Implement clustered async writes for clearing out B_DELWRI buffers.
  *	This is much better then the old way of writing only one buffer at
  *	a time.  Note that we may not be presented with the buffers in the 
  *	correct order, so we search for the cluster in both directions.
  */
 int
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
 	int j;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
 	int ncl;
 	struct buf *bpa;
 	int nwritten;
 	int size;
 	int maxcl;
 
 	s = splbio();
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
 	 * rather then at the beginning.
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
 	    (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 
 		size = vp->v_mount->mnt_stat.f_iosize;
 		maxcl = MAXPHYS / size;
 
 		for (i = 1; i < maxcl; i++) {
 			if ((bpa = gbincore(vp, lblkno + i)) &&
 			    BUF_REFCNT(bpa) == 0 &&
 			    ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno !=
 				     bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
 		for (j = 1; i + j <= maxcl && j <= lblkno; j++) {
 			if ((bpa = gbincore(vp, lblkno - j)) &&
 			    BUF_REFCNT(bpa) == 0 &&
 			    ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno !=
 				     bp->b_blkno - ((j * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
 		--j;
 		ncl = i + j;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
 			splx(s);
 			return nwritten;
 		}
 	}
 
 	BUF_LOCK(bp, LK_EXCLUSIVE);
 	bremfree(bp);
 	bp->b_flags |= B_ASYNC;
 
 	splx(s);
 	/*
 	 * default (old) behavior, writing out only one block
 	 *
 	 * XXX returns b_bufsize instead of b_bcount for nwritten?
 	 */
 	nwritten = bp->b_bufsize;
 	(void) VOP_BWRITE(bp->b_vp, bp);
 
 	return nwritten;
 }
 
 /*
  *	getnewbuf:
  *
  *	Find and initialize a new buffer header, freeing up existing buffers 
  *	in the bufqueues as necessary.  The new buffer is returned locked.
  *
  *	Important:  B_INVAL is not set.  If the caller wishes to throw the
  *	buffer away, the caller must set B_INVAL prior to calling brelse().
  *
  *	We block if:
  *		We have insufficient buffer headers
  *		We have insufficient buffer space
  *		buffer_map is too fragmented ( space reservation fails )
  *		If we have to flush dirty buffers ( but we try to avoid this )
  *
  *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
  *	Instead we ask the buf daemon to do it for us.  We attempt to
  *	avoid piecemeal wakeups of the pageout daemon.
  */
 
 static struct buf *
 getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 {
 	struct buf *bp;
 	struct buf *nbp;
 	struct buf *dbp;
 	int outofspace;
 	int nqindex;
 	int defrag = 0;
 	
 	++getnewbufcalls;
 	--getnewbufrestarts;
 restart:
 	++getnewbufrestarts;
 
 	/*
 	 * Calculate whether we are out of buffer space.  This state is
 	 * recalculated on every restart.  If we are out of space, we
 	 * have to turn off defragmentation.  Setting defrag to -1 when
 	 * outofspace is positive means "defrag while freeing buffers".
 	 * The looping conditional will be muffed up if defrag is left
 	 * positive when outofspace is positive.
 	 */
 
 	dbp = NULL;
 	outofspace = 0;
 	if (bufspace >= hibufspace) {
 		if ((curproc->p_flag & P_BUFEXHAUST) == 0 ||
 		    bufspace >= maxbufspace) {
 			outofspace = 1;
 			if (defrag > 0)
 				defrag = -1;
 		}
 	}
 
 	/*
 	 * defrag state is semi-persistant.  1 means we are flagged for
 	 * defragging.  -1 means we actually defragged something.
 	 */
 	/* nop */
 
 	/*
 	 * Setup for scan.  If we do not have enough free buffers,
 	 * we setup a degenerate case that immediately fails.  Note
 	 * that if we are specially marked process, we are allowed to
 	 * dip into our reserves.
 	 *
 	 * Normally we want to find an EMPTYKVA buffer.  That is, a
 	 * buffer with kva already allocated.  If there are no EMPTYKVA
 	 * buffers we back up to the truely EMPTY buffers.  When defragging
 	 * we do not bother backing up since we have to locate buffers with
 	 * kva to defrag.  If we are out of space we skip both EMPTY and
 	 * EMPTYKVA and dig right into the CLEAN queue.
 	 *
 	 * In this manner we avoid scanning unnecessary buffers.  It is very
 	 * important for us to do this because the buffer cache is almost
 	 * constantly out of space or in need of defragmentation.
 	 */
 
 	if ((curproc->p_flag & P_BUFEXHAUST) == 0 &&
 	    numfreebuffers < lofreebuffers) {
 		nqindex = QUEUE_CLEAN;
 		nbp = NULL;
 	} else {
 		nqindex = QUEUE_EMPTYKVA;
 		nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
 		if (nbp == NULL) {
 			if (defrag <= 0) {
 				nqindex = QUEUE_EMPTY;
 				nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
 			}
 		}
 		if (outofspace || nbp == NULL) {
 			nqindex = QUEUE_CLEAN;
 			nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
 		}
 	}
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
 
 	while ((bp = nbp) != NULL) {
 		int qindex = nqindex;
 
 		/*
 		 * Calculate next bp ( we can only use it if we do not block
 		 * or do other fancy things ).
 		 */
 		if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) {
 			switch(qindex) {
 			case QUEUE_EMPTY:
 				nqindex = QUEUE_EMPTYKVA;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA])))
 					break;
 				/* fall through */
 			case QUEUE_EMPTYKVA:
 				nqindex = QUEUE_CLEAN;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN])))
 					break;
 				/* fall through */
 			case QUEUE_CLEAN:
 				/*
 				 * nbp is NULL. 
 				 */
 				break;
 			}
 		}
 
 		/*
 		 * Sanity Checks
 		 */
 		KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
 
 		/*
 		 * Note: we no longer distinguish between VMIO and non-VMIO
 		 * buffers.
 		 */
 
 		KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
 
 		/*
 		 * If we are defragging and the buffer isn't useful for fixing
 		 * that problem we continue.  If we are out of space and the
 		 * buffer isn't useful for fixing that problem we continue.
 		 */
 
 		if (defrag > 0 && bp->b_kvasize == 0)
 			continue;
 		if (outofspace > 0 && bp->b_bufsize == 0)
 			continue;
 
 		/*
 		 * Start freeing the bp.  This is somewhat involved.  nbp
 		 * remains valid only for QUEUE_EMPTY[KVA] bp's.
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
 			panic("getnewbuf: locked buf");
 		bremfree(bp);
 
 		if (qindex == QUEUE_CLEAN) {
 			if (bp->b_flags & B_VMIO) {
 				bp->b_flags &= ~B_ASYNC;
 				vfs_vmio_release(bp);
 			}
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 
 		/*
 		 * NOTE:  nbp is now entirely invalid.  We can only restart
 		 * the scan from this point on.
 		 *
 		 * Get the rest of the buffer freed up.  b_kva* is still
 		 * valid after this operation.
 		 */
 
 		if (bp->b_rcred != NOCRED) {
 			crfree(bp->b_rcred);
 			bp->b_rcred = NOCRED;
 		}
 		if (bp->b_wcred != NOCRED) {
 			crfree(bp->b_wcred);
 			bp->b_wcred = NOCRED;
 		}
 		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
 			(*bioops.io_deallocate)(bp);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 
 		if (bp->b_bufsize)
 			allocbuf(bp, 0);
 
 		bp->b_flags = 0;
 		bp->b_dev = NODEV;
 		bp->b_vp = NULL;
 		bp->b_blkno = bp->b_lblkno = 0;
 		bp->b_offset = NOOFFSET;
 		bp->b_iodone = 0;
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_bcount = 0;
 		bp->b_npages = 0;
 		bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		LIST_INIT(&bp->b_dep);
 
 		/*
 		 * Ok, now that we have a free buffer, if we are defragging
 		 * we have to recover the kvaspace.  If we are out of space
 		 * we have to free the buffer (which we just did), but we
 		 * do not have to recover kva space unless we hit a defrag
 		 * hicup.  Being able to avoid freeing the kva space leads
 		 * to a significant reduction in overhead.
 		 */
 
 		if (defrag > 0) {
 			defrag = -1;
 			bp->b_flags |= B_INVAL;
 			bfreekva(bp);
 			brelse(bp);
 			goto restart;
 		}
 
 		if (outofspace > 0) {
 			outofspace = -1;
 			bp->b_flags |= B_INVAL;
 			if (defrag < 0)
 				bfreekva(bp);
 			brelse(bp);
 			goto restart;
 		}
 
 		/*
 		 * We are done
 		 */
 		break;
 	}
 
 	/*
 	 * If we exhausted our list, sleep as appropriate.  We may have to
 	 * wakeup various daemons and write out some dirty buffers.
 	 *
 	 * Generally we are sleeping due to insufficient buffer space.
 	 */
 
 	if (bp == NULL) {
 		int flags;
 		char *waitmsg;
 
 dosleep:
 		if (defrag > 0) {
 			flags = VFS_BIO_NEED_KVASPACE;
 			waitmsg = "nbufkv";
 		} else if (outofspace > 0) {
 			waitmsg = "nbufbs";
 			flags = VFS_BIO_NEED_BUFSPACE;
 		} else {
 			waitmsg = "newbuf";
 			flags = VFS_BIO_NEED_ANY;
 		}
 
 		/* XXX */
 
 		(void) speedup_syncer();
 		needsbuffer |= flags;
 		while (needsbuffer & flags) {
 			if (tsleep(&needsbuffer, (PRIBIO + 4) | slpflag,
 			    waitmsg, slptimeo))
 				return (NULL);
 		}
 	} else {
 		/*
 		 * We finally have a valid bp.  We aren't quite out of the
 		 * woods, we still have to reserve kva space.
 		 */
 		vm_offset_t addr = 0;
 
 		maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
 
 		if (maxsize != bp->b_kvasize) {
 			bfreekva(bp);
 
 			if (vm_map_findspace(buffer_map,
 				vm_map_min(buffer_map), maxsize, &addr)) {
 				/*
 				 * Uh oh.  Buffer map is to fragmented.  Try
 				 * to defragment.
 				 */
 				if (defrag <= 0) {
 					defrag = 1;
 					bp->b_flags |= B_INVAL;
 					brelse(bp);
 					goto restart;
 				}
 				/*
 				 * Uh oh.  We couldn't seem to defragment
 				 */
 				bp = NULL;
 				goto dosleep;
 			}
 		}
 		if (addr) {
 			vm_map_insert(buffer_map, NULL, 0,
 				addr, addr + maxsize,
 				VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 
 			bp->b_kvabase = (caddr_t) addr;
 			bp->b_kvasize = maxsize;
 		}
 		bp->b_data = bp->b_kvabase;
 	}
 	return(bp);
 }
 
 /*
  *	waitfreebuffers:
  *
  *	Wait for sufficient free buffers.  Only called from normal processes.
  */
 
 static void
 waitfreebuffers(int slpflag, int slptimeo) 
 {
 	while (numfreebuffers < hifreebuffers) {
 		if (numfreebuffers >= hifreebuffers)
 			break;
 		needsbuffer |= VFS_BIO_NEED_FREE;
 		if (tsleep(&needsbuffer, (PRIBIO + 4)|slpflag, "biofre", slptimeo))
 			break;
 	}
 }
 
 /*
  *	buf_daemon:
  *
  *	buffer flushing daemon.  Buffers are normally flushed by the
  *	update daemon but if it cannot keep up this process starts to
  *	take the load in an attempt to prevent getnewbuf() from blocking.
  */
 
 static struct proc *bufdaemonproc;
 static int bd_interval;
 static int bd_flushto;
 
 static struct kproc_desc buf_kp = {
 	"bufdaemon",
 	buf_daemon,
 	&bufdaemonproc
 };
 SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
 
 static void
 buf_daemon()
 {
 	int s;
 	/*
 	 * This process is allowed to take the buffer cache to the limit
 	 */
 	curproc->p_flag |= P_BUFEXHAUST;
 	s = splbio();
 
 	bd_interval = 5 * hz;	/* dynamically adjusted */
 	bd_flushto = hidirtybuffers;	/* dynamically adjusted */
 
 	while (TRUE) {
 		bd_request = 0;
 
 		/*
 		 * Do the flush.  Limit the number of buffers we flush in one
 		 * go.  The failure condition occurs when processes are writing
 		 * buffers faster then we can dispose of them.  In this case
 		 * we may be flushing so often that the previous set of flushes
 		 * have not had time to complete, causing us to run out of
 		 * physical buffers and block.
 		 */
 		{
 			int runcount = maxbdrun;
 
 			while (numdirtybuffers > bd_flushto && runcount) {
 				--runcount;
 				if (flushbufqueues() == 0)
 					break;
 			}
 		}
 
 		/*
 		 * If nobody is requesting anything we sleep
 		 */
 		if (bd_request == 0)
 			tsleep(&bd_request, PVM, "psleep", bd_interval);
 
 		/*
 		 * We calculate how much to add or subtract from bd_flushto
 		 * and bd_interval based on how far off we are from the 
 		 * optimal number of dirty buffers, which is 20% below the
 		 * hidirtybuffers mark.  We cannot use hidirtybuffers straight
 		 * because being right on the mark will cause getnewbuf()
 		 * to oscillate our wakeup.
 		 *
 		 * The larger the error in either direction, the more we adjust
 		 * bd_flushto and bd_interval.  The time interval is adjusted
 		 * by 2 seconds per whole-buffer-range of error.  This is an
 		 * exponential convergence algorithm, with large errors
 		 * producing large changes and small errors producing small
 		 * changes.
 		 */
 
 		{
 			int brange = hidirtybuffers - lodirtybuffers;
 			int middb = hidirtybuffers - brange / 5;
 			int deltabuf = middb - numdirtybuffers;
 
 			bd_flushto += deltabuf / 20;
 			bd_interval += deltabuf * (2 * hz) / (brange * 1);
 		}
 		if (bd_flushto < lodirtybuffers)
 			bd_flushto = lodirtybuffers;
 		if (bd_flushto > hidirtybuffers)
 			bd_flushto = hidirtybuffers;
 		if (bd_interval < hz / 10)
 			bd_interval = hz / 10;
 		if (bd_interval > 5 * hz)
 			bd_interval = 5 * hz;
 	}
 }
 
 /*
  *	flushbufqueues:
  *
  *	Try to flush a buffer in the dirty queue.  We must be careful to
  *	free up B_INVAL buffers instead of write them, which NFS is 
  *	particularly sensitive to.
  */
 
 static int
 flushbufqueues(void)
 {
 	struct buf *bp;
 	int r = 0;
 
 	bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]);
 
 	while (bp) {
 		KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
 		if ((bp->b_flags & B_DELWRI) != 0) {
 			if (bp->b_flags & B_INVAL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
 					panic("flushbufqueues: locked buf");
 				bremfree(bp);
 				brelse(bp);
 				++r;
 				break;
 			}
 			vfs_bio_awrite(bp);
 			++r;
 			break;
 		}
 		bp = TAILQ_NEXT(bp, b_freelist);
 	}
 	return(r);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 
 	int s = splbio();
 	bp = gbincore(vp, blkno);
 	splx(s);
 	return (bp);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t toff, tinc, size;
 	vm_page_t m;
 	vm_ooffset_t off;
 
 	if (incore(vp, blkno))
 		return 1;
 	if (vp->v_mount == NULL)
 		return 0;
 	if ((vp->v_object == NULL) || (vp->v_flag & VOBJBUF) == 0)
 		return 0;
 
 	obj = vp->v_object;
 	size = PAGE_SIZE;
 	if (size > vp->v_mount->mnt_stat.f_iosize)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
 			return 0;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			return 0;
 	}
 	return 1;
 }
 
 /*
  *	vfs_setdirty:
  *
  *	Sets the dirty range for a buffer based on the status of the dirty
  *	bits in the pages comprising the buffer.
  *
  *	The range is limited to the size of the buffer.
  *
  *	This routine is primarily used by NFS, but is generalized for the
  *	B_VMIO case.
  */
 static void
 vfs_setdirty(struct buf *bp) 
 {
 	int i;
 	vm_object_t object;
 
 	/*
 	 * Degenerate case - empty buffer
 	 */
 
 	if (bp->b_bufsize == 0)
 		return;
 
 	/*
 	 * We qualify the scan for modified pages on whether the
 	 * object has been flushed yet.  The OBJ_WRITEABLE flag
 	 * is not cleared simply by protecting pages off.
 	 */
 
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
 	object = bp->b_pages[0]->object;
 
 	if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p writeable but not mightbedirty\n", object);
 	if (!(object->flags & OBJ_WRITEABLE) && (object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p mightbedirty but not writeable\n", object);
 
 	if (object->flags & (OBJ_MIGHTBEDIRTY|OBJ_CLEANING)) {
 		vm_offset_t boffset;
 		vm_offset_t eoffset;
 
 		/*
 		 * test the pages to see if they have been modified directly
 		 * by users through the VM system.
 		 */
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 			vm_page_test_dirty(bp->b_pages[i]);
 		}
 
 		/*
 		 * Calculate the encompassing dirty range, boffset and eoffset,
 		 * (eoffset - boffset) bytes.
 		 */
 
 		for (i = 0; i < bp->b_npages; i++) {
 			if (bp->b_pages[i]->dirty)
 				break;
 		}
 		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		for (i = bp->b_npages - 1; i >= 0; --i) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		/*
 		 * Fit it to the buffer.
 		 */
 
 		if (eoffset > bp->b_bcount)
 			eoffset = bp->b_bcount;
 
 		/*
 		 * If we have a good dirty range, merge with the existing
 		 * dirty range.
 		 */
 
 		if (boffset < eoffset) {
 			if (bp->b_dirtyoff > boffset)
 				bp->b_dirtyoff = boffset;
 			if (bp->b_dirtyend < eoffset)
 				bp->b_dirtyend = eoffset;
 		}
 	}
 }
 
 /*
  *	getblk:
  *
  *	Get a block given a specified block and offset into a file/device.
  *	The buffers B_DONE bit will be cleared on return, making it almost
  * 	ready for an I/O initiation.  B_INVAL may or may not be set on 
  *	return.  The caller should clear B_INVAL prior to initiating a
  *	READ.
  *
  *	For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for
  *	an existing buffer.
  *
  *	For a VMIO buffer, B_CACHE is modified according to the backing VM.
  *	If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set
  *	and then cleared based on the backing VM.  If the previous buffer is
  *	non-0-sized but invalid, B_CACHE will be cleared.
  *
  *	If getblk() must create a new buffer, the new buffer is returned with
  *	both B_INVAL and B_CACHE clear unless it is a VMIO buffer, in which
  *	case it is returned with B_INVAL clear and B_CACHE set based on the
  *	backing VM.
  *
  *	getblk() also forces a VOP_BWRITE() for any B_DELWRI buffer whos
  *	B_CACHE bit is clear.
  *	
  *	What this means, basically, is that the caller should use B_CACHE to
  *	determine whether the buffer is fully valid or not and should clear
  *	B_INVAL prior to issuing a read.  If the caller intends to validate
  *	the buffer by loading its data area with something, the caller needs
  *	to clear B_INVAL.  If the caller does this without issuing an I/O, 
  *	the caller should set B_CACHE ( as an optimization ), else the caller
  *	should issue the I/O and biodone() will set B_CACHE if the I/O was
  *	a write attempt or if it was a successfull read.  If the caller 
  *	intends to issue a READ, the caller must clear B_INVAL and B_ERROR
  *	prior to issuing the READ.  biodone() will *not* clear B_INVAL.
  */
 struct buf *
 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 {
 	struct buf *bp;
 	int s;
 	struct bufhashhdr *bh;
 
 #if !defined(MAX_PERF)
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
 #endif
 
 	s = splbio();
 loop:
 	/*
 	 * Block if we are low on buffers.   Certain processes are allowed
 	 * to completely exhaust the buffer cache.
 	 */
 	if (curproc->p_flag & P_BUFEXHAUST) {
 		if (numfreebuffers == 0) {
 			needsbuffer |= VFS_BIO_NEED_ANY;
 			tsleep(&needsbuffer, (PRIBIO + 4) | slpflag, "newbuf",
 			    slptimeo);
 		}
 	} else if (numfreebuffers < lofreebuffers) {
 		waitfreebuffers(slpflag, slptimeo);
 	}
 
 	if ((bp = gbincore(vp, blkno))) {
 		/*
 		 * Buffer is in-core.  If the buffer is not busy, it must
 		 * be on a queue.
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "getblk", slpflag, slptimeo) == ENOLCK)
 				goto loop;
 			splx(s);
 			return (struct buf *) NULL;
 		}
 
 		/*
 		 * The buffer is locked.  B_CACHE is cleared if the buffer is 
 		 * invalid.  Ohterwise, for a non-VMIO buffer, B_CACHE is set
 		 * and for a VMIO buffer B_CACHE is adjusted according to the
 		 * backing VM cache.
 		 */
 		if (bp->b_flags & B_INVAL)
 			bp->b_flags &= ~B_CACHE;
 		else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
 			bp->b_flags |= B_CACHE;
 		bremfree(bp);
 
 		/*
 		 * check for size inconsistancies for non-VMIO case.
 		 */
 
 		if (bp->b_bcount != size) {
 			if ((bp->b_flags & B_VMIO) == 0 ||
 			    (size > bp->b_kvasize)) {
 				if (bp->b_flags & B_DELWRI) {
 					bp->b_flags |= B_NOCACHE;
 					VOP_BWRITE(bp->b_vp, bp);
 				} else {
 					if ((bp->b_flags & B_VMIO) &&
 					   (LIST_FIRST(&bp->b_dep) == NULL)) {
 						bp->b_flags |= B_RELBUF;
 						brelse(bp);
 					} else {
 						bp->b_flags |= B_NOCACHE;
 						VOP_BWRITE(bp->b_vp, bp);
 					}
 				}
 				goto loop;
 			}
 		}
 
 		/*
 		 * If the size is inconsistant in the VMIO case, we can resize
 		 * the buffer.  This might lead to B_CACHE getting set or
 		 * cleared.  If the size has not changed, B_CACHE remains
 		 * unchanged from its previous state.
 		 */
 
 		if (bp->b_bcount != size)
 			allocbuf(bp, size);
 
 		KASSERT(bp->b_offset != NOOFFSET, 
 		    ("getblk: no buffer offset"));
 
 		/*
 		 * A buffer with B_DELWRI set and B_CACHE clear must
 		 * be committed before we can return the buffer in
 		 * order to prevent the caller from issuing a read
 		 * ( due to B_CACHE not being set ) and overwriting
 		 * it.
 		 *
 		 * Most callers, including NFS and FFS, need this to
 		 * operate properly either because they assume they
 		 * can issue a read if B_CACHE is not set, or because
 		 * ( for example ) an uncached B_DELWRI might loop due 
 		 * to softupdates re-dirtying the buffer.  In the latter
 		 * case, B_CACHE is set after the first write completes,
 		 * preventing further loops.
 		 */
 
 		if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
 			VOP_BWRITE(bp->b_vp, bp);
 			goto loop;
 		}
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	} else {
 		/*
 		 * Buffer is not in-core, create new buffer.  The buffer
 		 * returned by getnewbuf() is locked.  Note that the returned
 		 * buffer is also considered valid (not marked B_INVAL).
 		 */
 		int bsize, maxsize, vmio;
 		off_t offset;
 
 		if (vp->v_type == VBLK)
 			bsize = DEV_BSIZE;
 		else if (vp->v_mountedhere)
 			bsize = vp->v_mountedhere->mnt_stat.f_iosize;
 		else if (vp->v_mount)
 			bsize = vp->v_mount->mnt_stat.f_iosize;
 		else
 			bsize = size;
 
 		offset = (off_t)blkno * bsize;
 		vmio = (vp->v_object != 0) && (vp->v_flag & VOBJBUF);
 		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
 		maxsize = imax(maxsize, bsize);
 
 		if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == NULL) {
 			if (slpflag || slptimeo) {
 				splx(s);
 				return NULL;
 			}
 			goto loop;
 		}
 
 		/*
 		 * This code is used to make sure that a buffer is not
 		 * created while the getnewbuf routine is blocked.
 		 * This can be a problem whether the vnode is locked or not.
 		 * If the buffer is created out from under us, we have to
 		 * throw away the one we just created.  There is now window
 		 * race because we are safely running at splbio() from the
 		 * point of the duplicate buffer creation through to here,
 		 * and we've locked the buffer.
 		 */
 		if (gbincore(vp, blkno)) {
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto loop;
 		}
 
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bp->b_offset = offset;
 
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = bufhash(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
 		/*
 		 * set B_VMIO bit.  allocbuf() the buffer bigger.  Since the
 		 * buffer size starts out as 0, B_CACHE will be set by
 		 * allocbuf() for the VMIO case prior to it testing the
 		 * backing store for validity.
 		 */
 
 		if (vmio) {
 			bp->b_flags |= B_VMIO;
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG && vp->v_type != VBLK)
 				printf("getblk: vmioing file type %d???\n", vp->v_type);
 #endif
 		} else {
 			bp->b_flags &= ~B_VMIO;
 		}
 
 		allocbuf(bp, size);
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	}
 	return (bp);
 }
 
 /*
  * Get an empty, disassociated buffer of given size.  The buffer is initially
  * set to B_INVAL.
  */
 struct buf *
 geteblk(int size)
 {
 	struct buf *bp;
 	int s;
 
 	s = splbio();
 	while ((bp = getnewbuf(0, 0, size, MAXBSIZE)) == 0);
 	splx(s);
 	allocbuf(bp, size);
 	bp->b_flags |= B_INVAL;	/* b_dep cleared by getnewbuf() */
 	return (bp);
 }
 
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).  This code is able to
  * resize a buffer up or down.
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistant data situations.  Tread lightly!!! 
  * There are B_CACHE and B_DELWRI interactions that must be dealt with by 
  * the caller.  Calling this code willy nilly can result in the loss of data.
  *
  * allocbuf() only adjusts B_CACHE for VMIO buffers.  getblk() deals with
  * B_CACHE for the non-VMIO case.
  */
 
 int
 allocbuf(struct buf *bp, int size)
 {
 	int newbsize, mbsize;
 	int i;
 
 #if !defined(MAX_PERF)
 	if (BUF_REFCNT(bp) == 0)
 		panic("allocbuf: buffer not busy");
 
 	if (bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 #endif
 
 	if ((bp->b_flags & B_VMIO) == 0) {
 		caddr_t origbuf;
 		int origbufsize;
 		/*
 		 * Just get anonymous memory from the kernel.  Don't
 		 * mess with B_CACHE.
 		 */
 		mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			newbsize = mbsize;
 		else
 #endif
 			newbsize = round_page(size);
 
 		if (newbsize < bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * malloced buffers are not shrunk
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				if (newbsize) {
 					bp->b_bcount = size;
 				} else {
 					free(bp->b_data, M_BIOBUF);
 					bufspace -= bp->b_bufsize;
 					bufmallocspace -= bp->b_bufsize;
 					runningbufspace -= bp->b_bufsize;
 					if (bp->b_bufsize)
 						bufspacewakeup();
 					bp->b_data = bp->b_kvabase;
 					bp->b_bufsize = 0;
 					bp->b_bcount = 0;
 					bp->b_flags &= ~B_MALLOC;
 				}
 				return 1;
 			}		
 #endif
 			vm_hold_free_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + newbsize,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize);
 		} else if (newbsize > bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * We only use malloced memory on the first allocation.
 			 * and revert to page-allocated memory when the buffer
 			 * grows.
 			 */
 			if ( (bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&
 				(mbsize <= PAGE_SIZE/2)) {
 
 				bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
 				bp->b_bufsize = mbsize;
 				bp->b_bcount = size;
 				bp->b_flags |= B_MALLOC;
 				bufspace += mbsize;
 				bufmallocspace += mbsize;
 				runningbufspace += bp->b_bufsize;
 				return 1;
 			}
 #endif
 			origbuf = NULL;
 			origbufsize = 0;
 #if !defined(NO_B_MALLOC)
 			/*
 			 * If the buffer is growing on its other-than-first allocation,
 			 * then we revert to the page-allocation scheme.
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				origbuf = bp->b_data;
 				origbufsize = bp->b_bufsize;
 				bp->b_data = bp->b_kvabase;
 				bufspace -= bp->b_bufsize;
 				bufmallocspace -= bp->b_bufsize;
 				runningbufspace -= bp->b_bufsize;
 				if (bp->b_bufsize)
 					bufspacewakeup();
 				bp->b_bufsize = 0;
 				bp->b_flags &= ~B_MALLOC;
 				newbsize = round_page(newbsize);
 			}
 #endif
 			vm_hold_load_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize,
 			    (vm_offset_t) bp->b_data + newbsize);
 #if !defined(NO_B_MALLOC)
 			if (origbuf) {
 				bcopy(origbuf, bp->b_data, origbufsize);
 				free(origbuf, M_BIOBUF);
 			}
 #endif
 		}
 	} else {
 		vm_page_t m;
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		desiredpages = (size == 0) ? 0 :
 			num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			panic("allocbuf: VMIO buffer can't be malloced");
 #endif
 		/*
 		 * Set B_CACHE initially if buffer is 0 length or will become
 		 * 0-length.
 		 */
 		if (size == 0 || bp->b_bufsize == 0)
 			bp->b_flags |= B_CACHE;
 
 		if (newbsize < bp->b_bufsize) {
 			/*
 			 * DEV_BSIZE aligned new buffer size is less then the
 			 * DEV_BSIZE aligned existing buffer size.  Figure out
 			 * if we have to remove any pages.
 			 */
 			if (desiredpages < bp->b_npages) {
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
 					 * the page is not freed here -- it
 					 * is the responsibility of 
 					 * vnode_pager_setsize
 					 */
 					m = bp->b_pages[i];
 					KASSERT(m != bogus_page,
 					    ("allocbuf: bogus page found"));
 					while (vm_page_sleep_busy(m, TRUE, "biodep"))
 						;
 
 					bp->b_pages[i] = NULL;
 					vm_page_unwire(m, 0);
 				}
 				pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) +
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
 		} else if (size > bp->b_bcount) {
 			/*
 			 * We are growing the buffer, possibly in a 
 			 * byte-granular fashion.
 			 */
 			struct vnode *vp;
 			vm_object_t obj;
 			vm_offset_t toff;
 			vm_offset_t tinc;
 
 			/*
 			 * Step 1, bring in the VM pages from the object, 
 			 * allocating them if necessary.  We must clear
 			 * B_CACHE if these pages are not valid for the 
 			 * range covered by the buffer.
 			 */
 
 			vp = bp->b_vp;
 			obj = vp->v_object;
 
 			while (bp->b_npages < desiredpages) {
 				vm_page_t m;
 				vm_pindex_t pi;
 
 				pi = OFF_TO_IDX(bp->b_offset) + bp->b_npages;
 				if ((m = vm_page_lookup(obj, pi)) == NULL) {
 					m = vm_page_alloc(obj, pi, VM_ALLOC_NORMAL);
 					if (m == NULL) {
 						VM_WAIT;
 						vm_pageout_deficit += desiredpages - bp->b_npages;
 					} else {
 						vm_page_wire(m);
 						vm_page_wakeup(m);
 						bp->b_flags &= ~B_CACHE;
 						bp->b_pages[bp->b_npages] = m;
 						++bp->b_npages;
 					}
 					continue;
 				}
 
 				/*
 				 * We found a page.  If we have to sleep on it,
 				 * retry because it might have gotten freed out
 				 * from under us.
 				 *
 				 * We can only test PG_BUSY here.  Blocking on
 				 * m->busy might lead to a deadlock:
 				 *
 				 *  vm_fault->getpages->cluster_read->allocbuf
 				 *
 				 */
 
 				if (vm_page_sleep_busy(m, FALSE, "pgtblk"))
 					continue;
 
 				/*
 				 * We have a good page.  Should we wakeup the
 				 * page daemon?
 				 */
 				if ((curproc != pageproc) &&
 				    ((m->queue - m->pc) == PQ_CACHE) &&
 				    ((cnt.v_free_count + cnt.v_cache_count) <
 					(cnt.v_free_min + cnt.v_cache_min))) {
 					pagedaemon_wakeup();
 				}
 				vm_page_flag_clear(m, PG_ZERO);
 				vm_page_wire(m);
 				bp->b_pages[bp->b_npages] = m;
 				++bp->b_npages;
 			}
 
 			/*
 			 * Step 2.  We've loaded the pages into the buffer,
 			 * we have to figure out if we can still have B_CACHE
 			 * set.  Note that B_CACHE is set according to the
 			 * byte-granular range ( bcount and size ), new the
 			 * aligned range ( newbsize ).
 			 *
 			 * The VM test is against m->valid, which is DEV_BSIZE
 			 * aligned.  Needless to say, the validity of the data
 			 * needs to also be DEV_BSIZE aligned.  Note that this
 			 * fails with NFS if the server or some other client
 			 * extends the file's EOF.  If our buffer is resized, 
 			 * B_CACHE may remain set! XXX
 			 */
 
 			toff = bp->b_bcount;
 			tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
 
 			while ((bp->b_flags & B_CACHE) && toff < size) {
 				vm_pindex_t pi;
 
 				if (tinc > (size - toff))
 					tinc = size - toff;
 
 				pi = ((bp->b_offset & PAGE_MASK) + toff) >> 
 				    PAGE_SHIFT;
 
 				vfs_buf_test_cache(
 				    bp, 
 				    bp->b_offset,
 				    toff, 
 				    tinc, 
 				    bp->b_pages[pi]
 				);
 				toff += tinc;
 				tinc = PAGE_SIZE;
 			}
 
 			/*
 			 * Step 3, fixup the KVM pmap.  Remember that
 			 * bp->b_data is relative to bp->b_offset, but 
 			 * bp->b_offset may be offset into the first page.
 			 */
 
 			bp->b_data = (caddr_t)
 			    trunc_page((vm_offset_t)bp->b_data);
 			pmap_qenter(
 			    (vm_offset_t)bp->b_data,
 			    bp->b_pages, 
 			    bp->b_npages
 			);
 			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
 			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 		}
 	}
 	if (bp->b_flags & B_VMIO)
 		vmiospace += (newbsize - bp->b_bufsize);
 	bufspace += (newbsize - bp->b_bufsize);
 	runningbufspace += (newbsize - bp->b_bufsize);
 	if (newbsize < bp->b_bufsize)
 		bufspacewakeup();
 	bp->b_bufsize = newbsize;	/* actual buffer allocation	*/
 	bp->b_bcount = size;		/* requested buffer size	*/
 	return 1;
 }
 
 /*
  *	biowait:
  *
  *	Wait for buffer I/O completion, returning error status.  The buffer
  *	is left locked and B_DONE on return.  B_EINTR is converted into a EINTR
  *	error and cleared.
  */
 int
 biowait(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 	while ((bp->b_flags & B_DONE) == 0) {
 #if defined(NO_SCHEDULE_MODS)
 		tsleep(bp, PRIBIO, "biowait", 0);
 #else
 		if (bp->b_flags & B_READ)
 			tsleep(bp, PRIBIO, "biord", 0);
 		else
 			tsleep(bp, PRIBIO, "biowr", 0);
 #endif
 	}
 	splx(s);
 	if (bp->b_flags & B_EINTR) {
 		bp->b_flags &= ~B_EINTR;
 		return (EINTR);
 	}
 	if (bp->b_flags & B_ERROR) {
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
 	}
 }
 
 /*
  *	biodone:
  *
  *	Finish I/O on a buffer, optionally calling a completion function.
  *	This is usually called from an interrupt so process blocking is
  *	not allowed.
  *
  *	biodone is also responsible for setting B_CACHE in a B_VMIO bp.
  *	In a non-VMIO bp, B_CACHE will be set on the next getblk() 
  *	assuming B_INVAL is clear.
  *
  *	For the VMIO case, we set B_CACHE if the op was a read and no
  *	read error occured, or if the op was a write.  B_CACHE is never
  *	set if the buffer is invalid or otherwise uncacheable.
  *
  *	biodone does not mess with B_INVAL, allowing the I/O routine or the
  *	initiator to leave B_INVAL set to brelse the buffer out of existance
  *	in the biodone routine.
  */
 void
 biodone(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 	KASSERT(BUF_REFCNT(bp) > 0, ("biodone: bp %p not busy %d", bp, BUF_REFCNT(bp)));
 	KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
 
 	bp->b_flags |= B_DONE;
 
 	if (bp->b_flags & B_FREEBUF) {
 		brelse(bp);
 		splx(s);
 		return;
 	}
 
 	if ((bp->b_flags & B_READ) == 0) {
 		vwakeup(bp);
 	}
 
 	/* call optional completion function if requested */
 	if (bp->b_flags & B_CALL) {
 		bp->b_flags &= ~B_CALL;
 		(*bp->b_iodone) (bp);
 		splx(s);
 		return;
 	}
 	if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
 		(*bioops.io_complete)(bp);
 
 	if (bp->b_flags & B_VMIO) {
 		int i, resid;
 		vm_ooffset_t foff;
 		vm_page_t m;
 		vm_object_t obj;
 		int iosize;
 		struct vnode *vp = bp->b_vp;
 
 		obj = vp->v_object;
 
 #if defined(VFS_BIO_DEBUG)
 		if (vp->v_usecount == 0) {
 			panic("biodone: zero vnode ref count");
 		}
 
 		if (vp->v_object == NULL) {
 			panic("biodone: missing VM object");
 		}
 
 		if ((vp->v_flag & VOBJBUF) == 0) {
 			panic("biodone: vnode is not setup for merged cache");
 		}
 #endif
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("biodone: no buffer offset"));
 
 #if !defined(MAX_PERF)
 		if (!obj) {
 			panic("biodone: no object");
 		}
 #endif
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 			    obj->paging_in_progress, bp->b_npages);
 		}
 #endif
 
 		/*
 		 * Set B_CACHE if the op was a normal read and no error
 		 * occured.  B_CACHE is set for writes in the b*write()
 		 * routines.
 		 */
 		iosize = bp->b_bcount;
 		if ((bp->b_flags & (B_READ|B_FREEBUF|B_INVAL|B_NOCACHE|B_ERROR)) == B_READ) {
 			bp->b_flags |= B_CACHE;
 		}
 
 		for (i = 0; i < bp->b_npages; i++) {
 			int bogusflag = 0;
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				bogusflag = 1;
 				m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 				if (!m) {
 #if defined(VFS_BIO_DEBUG)
 					printf("biodone: page disappeared\n");
 #endif
 					vm_object_pip_subtract(obj, 1);
 					bp->b_flags &= ~B_CACHE;
 					continue;
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 #if defined(VFS_BIO_DEBUG)
 			if (OFF_TO_IDX(foff) != m->pindex) {
 				printf(
 "biodone: foff(%lu)/m->pindex(%d) mismatch\n",
 				    (unsigned long)foff, m->pindex);
 			}
 #endif
 			resid = IDX_TO_OFF(m->pindex + 1) - foff;
 			if (resid > iosize)
 				resid = iosize;
 
 			/*
 			 * In the write case, the valid and clean bits are
 			 * already changed correctly ( see bdwrite() ), so we 
 			 * only need to do this here in the read case.
 			 */
 			if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
 				vfs_page_set_valid(bp, foff, i, m);
 			}
 			vm_page_flag_clear(m, PG_ZERO);
 
 			/*
 			 * when debugging new filesystems or buffer I/O methods, this
 			 * is the most common error that pops up.  if you see this, you
 			 * have not set the page busy flag correctly!!!
 			 */
 			if (m->busy == 0) {
 #if !defined(MAX_PERF)
 				printf("biodone: page busy < 0, "
 				    "pindex: %d, foff: 0x(%x,%x), "
 				    "resid: %d, index: %d\n",
 				    (int) m->pindex, (int)(foff >> 32),
 						(int) foff & 0xffffffff, resid, i);
 #endif
 				if (vp->v_type != VBLK)
 #if !defined(MAX_PERF)
 					printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    bp->b_vp->v_mount->mnt_stat.f_iosize,
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				else
 					printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
 				    m->valid, m->dirty, m->wire_count);
 #endif
 				panic("biodone: page busy < 0\n");
 			}
 			vm_page_io_finish(m);
 			vm_object_pip_subtract(obj, 1);
 			foff += resid;
 			iosize -= resid;
 		}
 		if (obj)
 			vm_object_pip_wakeupn(obj, 0);
 	}
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * will do a wakeup there if necessary - so no need to do a wakeup
 	 * here in the async case. The sync case always needs to do a wakeup.
 	 */
 
 	if (bp->b_flags & B_ASYNC) {
 		if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0)
 			brelse(bp);
 		else
 			bqrelse(bp);
 	} else {
 		wakeup(bp);
 	}
 	splx(s);
 }
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj = vp->v_object;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
 #if !defined(MAX_PERF)
 				if (!m) {
 					panic("vfs_unbusy_pages: page missing\n");
 				}
 #endif
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 			vm_object_pip_subtract(obj, 1);
 			vm_page_flag_clear(m, PG_ZERO);
 			vm_page_io_finish(m);
 		}
 		vm_object_pip_wakeupn(obj, 0);
 	}
 }
 
 /*
  * vfs_page_set_valid:
  *
  *	Set the valid bits in a page based on the supplied offset.   The
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundry or cross the end of the buffer.  The end of the
 	 * buffer, in this case, is our file EOF, not the allocation size
 	 * of the buffer.
 	 */
 	soff = off;
 	eoff = (off + PAGE_SIZE) & ~PAGE_MASK;
 	if (eoff > bp->b_offset + bp->b_bcount)
 		eoff = bp->b_offset + bp->b_bcount;
 
 	/*
 	 * Set valid range.  This is typically the entire buffer and thus the
 	 * entire page.
 	 */
 	if (eoff > soff) {
 		vm_page_set_validclean(
 		    m,
 		   (vm_offset_t) (soff & PAGE_MASK),
 		   (vm_offset_t) (eoff - soff)
 		);
 	}
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being PG_BUSY.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistant.
  *
  * Since I/O has not been initiated yet, certain buffer flags
  * such as B_ERROR or B_INVAL may be in an inconsistant state
  * and should be ignored.
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i, bogus;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj = vp->v_object;
 		vm_ooffset_t foff;
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_busy_pages: no buffer offset"));
 		vfs_setdirty(bp);
 
 retry:
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			if (vm_page_sleep_busy(m, FALSE, "vbpage"))
 				goto retry;
 		}
 
 		bogus = 0;
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			vm_page_flag_clear(m, PG_ZERO);
 			if ((bp->b_flags & B_CLUSTER) == 0) {
 				vm_object_pip_add(obj, 1);
 				vm_page_io_start(m);
 			}
 
 			/*
 			 * When readying a buffer for a read ( i.e
 			 * clear_modify == 0 ), it is important to do
 			 * bogus_page replacement for valid pages in 
 			 * partially instantiated buffers.  Partially 
 			 * instantiated buffers can, in turn, occur when
 			 * reconstituting a buffer from its VM backing store
 			 * base.  We only have to do this if B_CACHE is
 			 * clear ( which causes the I/O to occur in the
 			 * first place ).  The replacement prevents the read
 			 * I/O from overwriting potentially dirty VM-backed
 			 * pages.  XXX bogus page replacement is, uh, bogus.
 			 * It may not work properly with small-block devices.
 			 * We need to find a better way.
 			 */
 
 			vm_page_protect(m, VM_PROT_NONE);
 			if (clear_modify)
 				vfs_page_set_valid(bp, foff, i, m);
 			else if (m->valid == VM_PAGE_BITS_ALL &&
 				(bp->b_flags & B_CACHE) == 0) {
 				bp->b_pages[i] = bogus_page;
 				bogus++;
 			}
 			foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 		}
 		if (bogus)
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 	}
 }
 
 /*
  * Tell the VM system that the pages associated with this buffer
  * are clean.  This is used for delayed writes where the data is
  * going to go to disk eventually without additional VM intevention.
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
  */
 static void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_ooffset_t foff;
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_clean_pages: no buffer offset"));
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			vm_ooffset_t noff = (foff + PAGE_SIZE) & ~PAGE_MASK;
 			vm_ooffset_t eoff = noff;
 
 			if (eoff > bp->b_offset + bp->b_bufsize)
 				eoff = bp->b_offset + bp->b_bufsize;
 			vfs_page_set_valid(bp, foff, i, m);
 			/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
 			foff = noff;
 		}
 	}
 }
 
 /*
  *	vfs_bio_set_validclean:
  *
  *	Set the range within the buffer to valid and clean.  The range is 
  *	relative to the beginning of the buffer, b_offset.  Note that b_offset
  *	itself may be offset from the beginning of the first page.
  */
 
 void   
 vfs_bio_set_validclean(struct buf *bp, int base, int size)
 {
 	if (bp->b_flags & B_VMIO) {
 		int i;
 		int n;
 
 		/*
 		 * Fixup base to be relative to beginning of first page.
 		 * Set initial n to be the maximum number of bytes in the
 		 * first page that can be validated.
 		 */
 
 		base += (bp->b_offset & PAGE_MASK);
 		n = PAGE_SIZE - (base & PAGE_MASK);
 
 		for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (n > size)
 				n = size;
 
 			vm_page_set_validclean(m, base & PAGE_MASK, n);
 			base += n;
 			size -= n;
 			n = PAGE_SIZE;
 		}
 	}
 }
 
 /*
  *	vfs_bio_clrbuf:
  *
  *	clear a buffer.  This routine essentially fakes an I/O, so we need
  *	to clear B_ERROR and B_INVAL.
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
  */
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
 	int i, mask = 0;
 	caddr_t sa, ea;
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
 		bp->b_flags &= ~(B_INVAL|B_ERROR);
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
 		    (bp->b_offset & PAGE_MASK) == 0) {
 			mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
 			if (((bp->b_pages[0]->flags & PG_ZERO) == 0) &&
 			    ((bp->b_pages[0]->valid & mask) != mask)) {
 				bzero(bp->b_data, bp->b_bufsize);
 			}
 			bp->b_pages[0]->valid |= mask;
 			bp->b_resid = 0;
 			return;
 		}
 		ea = sa = bp->b_data;
 		for(i=0;i<bp->b_npages;i++,sa=ea) {
 			int j = ((u_long)sa & PAGE_MASK) / DEV_BSIZE;
 			ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
 			ea = (caddr_t)ulmin((u_long)ea,
 				(u_long)bp->b_data + bp->b_bufsize);
 			mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 			if ((bp->b_pages[i]->valid & mask) == mask)
 				continue;
 			if ((bp->b_pages[i]->valid & mask) == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
 					bzero(sa, ea - sa);
 				}
 			} else {
 				for (; sa < ea; sa += DEV_BSIZE, j++) {
 					if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 						(bp->b_pages[i]->valid & (1<<j)) == 0)
 						bzero(sa, DEV_BSIZE);
 				}
 			}
 			bp->b_pages[i]->valid |= mask;
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 		}
 		bp->b_resid = 0;
 	} else {
 		clrbuf(bp);
 	}
 }
 
 /*
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 
 tryagain:
 
 		p = vm_page_alloc(kernel_object,
 			((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 		    VM_ALLOC_NORMAL);
 		if (!p) {
 			vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
 			VM_WAIT;
 			goto tryagain;
 		}
 		vm_page_wire(p);
 		p->valid = VM_PAGE_BITS_ALL;
 		vm_page_flag_clear(p, PG_ZERO);
 		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
 		bp->b_pages[index] = p;
 		vm_page_wakeup(p);
 	}
 	bp->b_npages = index;
 }
 
 void
 vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
 #if !defined(MAX_PERF)
 			if (p->busy) {
 				printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n",
 					bp->b_blkno, bp->b_lblkno);
 			}
 #endif
 			bp->b_pages[index] = NULL;
 			pmap_kremove(pg);
 			vm_page_busy(p);
 			vm_page_unwire(p, 0);
 			vm_page_free(p);
 		}
 	}
 	bp->b_npages = newnpages;
 }
 
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(buffer, db_show_buffer)
 {
 	/* get args */
 	struct buf *bp = (struct buf *)addr;
 
 	if (!have_addr) {
 		db_printf("usage: show buffer <addr>\n");
 		return;
 	}
 
 	db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS);
 	db_printf("b_error = %d, b_bufsize = %ld, b_bcount = %ld, "
 		  "b_resid = %ld\nb_dev = (%d,%d), b_data = %p, "
 		  "b_blkno = %d, b_pblkno = %d\n",
 		  bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 		  major(bp->b_dev), minor(bp->b_dev),
 		  bp->b_data, bp->b_blkno, bp->b_pblkno);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m;
 			m = bp->b_pages[i];
 			db_printf("(%p, 0x%lx, 0x%lx)", (void *)m->object,
 			    (u_long)m->pindex, (u_long)VM_PAGE_TO_PHYS(m));
 			if ((i + 1) < bp->b_npages)
 				db_printf(",");
 		}
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/vfs_export.c
===================================================================
--- head/sys/kern/vfs_export.c	(revision 49534)
+++ head/sys/kern/vfs_export.c	(revision 49535)
@@ -1,2978 +1,2976 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.213 1999/07/20 09:47:44 phk Exp $
+ * $Id: vfs_subr.c,v 1.214 1999/07/26 06:25:17 alc Exp $
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/domain.h>
 #include <sys/dirent.h>
 #include <sys/vmmeter.h>
+#include <sys/conf.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
-
-#include <miscfs/specfs/specdev.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static struct vnode *checkalias2 __P((struct vnode *nvp, dev_t dev, struct mount *mp));
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vfree __P((struct vnode *));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
 static unsigned long	numvnodes;
 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 struct tobefreelist vnode_tobefree_list;	/* vnode free list */
 
 static u_long wantfreevnodes = 25;
 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 static u_long freevnodes = 0;
 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
 static int reassignbufloops;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
 static int reassignbufsortgood;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
 static int reassignbufsortbad;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
 static int reassignbufmethod = 1;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
 
 #ifdef ENABLE_VFS_IOOPT
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 #endif
 
 struct mntlist mountlist;	/* mounted filesystem list */
 struct simplelock mountlist_slock;
 struct simplelock mntvnode_slock;
 int	nfs_mount_type = -1;
 #ifndef NULL_SIMPLELOCKS
 static struct simplelock mntid_slock;
 static struct simplelock vnode_free_list_slock;
 static struct simplelock spechash_slock;
 #endif
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
 /*
  * The workitem queue.
  */
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 time_t syncdelay = 30;		/* max time to delay syncing data */
 time_t filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
 time_t dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 time_t metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
 static int rushjob;			/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
 static int syncer_delayno = 0;
 static long syncer_mask; 
 LIST_HEAD(synclist, vnode);
 static struct synclist *syncer_workitem_pending;
 
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 
     &desiredvnodes, 0, "Maximum number of vnodes");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 void
 vntblinit()
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	simple_lock_init(&mntvnode_slock);
 	simple_lock_init(&mntid_slock);
 	simple_lock_init(&spechash_slock);
 	TAILQ_INIT(&vnode_free_list);
 	TAILQ_INIT(&vnode_tobefree_list);
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
 	/*
 	 * Initialize the filesystem syncer.
 	 */     
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
 		&syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct simplelock *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		if (interlkp) {
 			simple_unlock(interlkp);
 		}
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
 		if (interlkp) {
 			simple_lock(interlkp);
 		}
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	if (fstypename == NULL)
 		return (ENODEV);
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 	    mp = mp->mnt_list.cqe_next) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			simple_unlock(&mountlist_slock);
 			return (mp);
 	    }
 	}
 	simple_unlock(&mountlist_slock);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_short xxxfs_mntid;
 
 	fsid_t tfsid;
 	int mtype;
 
 	simple_lock(&mntid_slock); 
 	mtype = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_stat.f_fsid.val[0] = makeudev(255, mtype);
 	mp->mnt_stat.f_fsid.val[1] = mtype;
 	if (xxxfs_mntid == 0)
 		++xxxfs_mntid;
 	tfsid.val[0] = makeudev(255, mtype + (xxxfs_mntid << 16));
 	tfsid.val[1] = mtype;
 	if (mountlist.cqh_first != (void *)&mountlist) {
 		while (vfs_getvfs(&tfsid)) {
 			xxxfs_mntid++;
 			tfsid.val[0] = makeudev(255,
 			    mtype + (xxxfs_mntid << 16));
 		}
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	simple_unlock(&mntid_slock);
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 extern vop_t **dead_vnodeop_p;
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *tvp, *nvp;
 	vm_object_t object;
 	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	TAILQ_INIT(&vnode_tmp_list);
 
 	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
 		nvp = TAILQ_NEXT(vp, v_freelist);
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		if (vp->v_flag & VAGE) {
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		} else {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		}
 		vp->v_flag &= ~(VTBFREE|VAGE);
 		vp->v_flag |= VFREE;
 		if (vp->v_usecount)
 			panic("tobe free vnode isn't");
 		freevnodes++;
 	}
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else {
 		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
 			nvp = TAILQ_NEXT(vp, v_freelist);
 			if (!simple_lock_try(&vp->v_interlock)) 
 				continue;
 			if (vp->v_usecount)
 				panic("free vnode isn't");
 
 			object = vp->v_object;
 			if (object && (object->resident_page_count || object->ref_count)) {
 				printf("object inconsistant state: RPC: %d, RC: %d\n",
 					object->resident_page_count, object->ref_count);
 				/* Don't recycle if it's caching some pages */
 				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
 				continue;
 			} else if (LIST_FIRST(&vp->v_cache_src)) {
 				/* Don't recycle if active in the namecache */
 				simple_unlock(&vp->v_interlock);
 				continue;
 			} else {
 				break;
 			}
 		}
 	}
 
 	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
 		nvp = TAILQ_NEXT(tvp, v_freelist);
 		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
 		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
 		simple_unlock(&tvp->v_interlock);
 	}
 
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 		simple_unlock(&vnode_free_list_slock);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 
 #ifdef INVARIANTS
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastr = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 		vp->v_writecount = 0;	/* XXX */
 		vp->v_maxio = 0;
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		simple_lock_init(&vp->v_interlock);
 		vp->v_dd = vp;
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	TAILQ_INIT(&vp->v_cleanblkhd);
 	TAILQ_INIT(&vp->v_dirtyblkhd);
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	simple_lock(&mntvnode_slock);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		simple_unlock(&mntvnode_slock);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	simple_unlock(&mntvnode_slock);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
 	if (flags & V_SAVE) {
 		s = splbio();
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
 			if (error) {
 				splx(s);
 				return (error);
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			splx(s);
 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 				return (error);
 			s = splbio();
 			if (vp->v_numoutput > 0 ||
 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
 				panic("vinvalbuf: dirty bufs");
 		}
 		splx(s);
   	}
 	s = splbio();
 	for (;;) {
 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
 		if (!blist)
 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 				error = BUF_TIMELOCK(bp,
 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
 				    "vinvalbuf", slpflag, slptimeo);
 				if (error == ENOLCK)
 					break;
 				splx(s);
 				return (error);
 			}
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.  Note that vfs_bio_awrite expects
 			 * buffers to reside on a queue, while VOP_BWRITE and
 			 * brelse do not.
 			 */
 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 				(flags & V_SAVE)) {
 
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						BUF_UNLOCK(bp);
 						vfs_bio_awrite(bp);
 					} else {
 						bremfree(bp);
 						bp->b_flags |= B_ASYNC;
 						VOP_BWRITE(bp->b_vp, bp);
 					}
 				} else {
 					bremfree(bp);
 					(void) VOP_BWRITE(bp->b_vp, bp);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	simple_lock(&vp->v_interlock);
 	object = vp->v_object;
 	if (object != NULL) {
 		vm_object_page_remove(object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	simple_unlock(&vp->v_interlock);
 
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(vp, cred, p, length, blksize)
 	register struct vnode *vp;
 	struct ucred *cred;
 	struct proc *p;
 	off_t length;
 	int blksize;
 {
 	register struct buf *bp;
 	struct buf *nbp;
 	int s, anyfreed;
 	int trunclbn;
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = (length + blksize - 1) / blksize;
 
 	s = splbio();
 restart:
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)||
 					 (nbp->b_vp != vp) ||
 					 (nbp->b_flags & B_DELWRI))) {
 					goto restart;
 				}
 			}
 		}
 
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)||
 					 (nbp->b_vp != vp) ||
 					 (nbp->b_flags & B_DELWRI) == 0)) {
 					goto restart;
 				}
 			}
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					if (bp->b_vp == vp) {
 						bp->b_flags |= B_ASYNC;
 					} else {
 						bp->b_flags &= ~B_ASYNC;
 					}
 					VOP_BWRITE(bp->b_vp, bp);
 				}
 				goto restartsync;
 			}
 
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
 	}
 
 	splx(s);
 
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bp->b_xflags |= B_VNCLEAN;
 	bp->b_xflags &= ~B_VNDIRTY;
 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	struct buflists *listheadp;
 	int s;
 
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
 		if (bp->b_xflags & B_VNDIRTY)
 			listheadp = &vp->v_dirtyblkhd;
 		else 
 			listheadp = &vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
 	}
 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vp->v_flag &= ~VONWORKLST;
 		LIST_REMOVE(vp, v_synclist);
 	}
 	splx(s);
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
  * The workitem queue.
  * 
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 {
 	int s, slot;
 
 	s = splbio();
 
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
 	vp->v_flag |= VONWORKLST;
 	splx(s);
 }
 
 struct  proc *updateproc;
 static void sched_sync __P((void));
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
 
 /*
  * System filesystem synchronizer daemon.
  */
 void 
 sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
 
 	p->p_flag |= P_BUFEXHAUST;
 
 	for (;;) {
 		starttime = time_second;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 */
 		s = splbio();
 		slp = &syncer_workitem_pending[syncer_delayno];
 		syncer_delayno += 1;
 		if (syncer_delayno == syncer_maxdelay)
 			syncer_delayno = 0;
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
 			if (VOP_ISLOCKED(vp) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
 				/*
 				 * Note: v_tag VT_VFS vps can remain on the
 				 * worklist too with no dirty blocks, but 
 				 * since sync_fsync() moves it to a different 
 				 * slot we are safe.
 				 */
 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
 				    vp->v_type != VBLK)
 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
 				 * position and then add us back in at a later
 				 * position.
 				 */
 				vn_syncer_add_to_worklist(vp, syncdelay);
 			}
 			splx(s);
 		}
 
 		/*
 		 * Do soft update processing.
 		 */
 		if (bioops.io_sync)
 			(*bioops.io_sync)(NULL);
 
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (time_second == starttime)
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer()
 {
 	int s;
 
 	s = splhigh();
 	if (updateproc->p_wchan == &lbolt)
 		setrunnable(updateproc);
 	splx(s);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		return (1);
 	}
 	return(0);
 }
 
 /*
  * Associate a p-buffer with a vnode.
  *
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
  * with the buffer.  i.e. the bp has not been linked into the vnode or
  * ref-counted.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 
 	bp->b_vp = vp;
 	bp->b_flags |= B_PAGING;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 
 #if !defined(MAX_PERF)
 	/* XXX REMOVE ME */
 	if (bp->b_vnbufs.tqe_next != NULL) {
 		panic(
 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x", 
 		    bp,
 		    (int)bp->b_flags
 		);
 	}
 #endif
 	bp->b_vp = (struct vnode *) 0;
 	bp->b_flags &= ~B_PAGING;
 }
 
 void
 pbreassignbuf(bp, newvp)
 	struct buf *bp;
 	struct vnode *newvp;
 {
 #if !defined(MAX_PERF)
 	if ((bp->b_flags & B_PAGING) == 0) {
 		panic(
 		    "pbreassignbuf() on non phys bp %p", 
 		    bp
 		);
 	}
 #endif
 	bp->b_vp = newvp;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
 	struct buflists *listheadp;
 	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 	++reassignbufcalls;
 
 #if !defined(MAX_PERF)
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 #endif
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
 		if (bp->b_xflags & B_VNDIRTY)
 			listheadp = &bp->b_vp->v_dirtyblkhd;
 		else 
 			listheadp = &bp->b_vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
 		if (bp->b_vp != newvp) {
 			vdrop(bp->b_vp);
 			bp->b_vp = NULL;	/* for clarification */
 		}
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
 		listheadp = &newvp->v_dirtyblkhd;
 		if ((newvp->v_flag & VONWORKLST) == 0) {
 			switch (newvp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VBLK:
 				if (newvp->v_specmountpoint != NULL) {
 					delay = metadelay;
 					break;
 				}
 				/* fall through */
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(newvp, delay);
 		}
 		bp->b_xflags |= B_VNDIRTY;
 		tbp = TAILQ_FIRST(listheadp);
 		if (tbp == NULL ||
 		    bp->b_lblkno == 0 ||
 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (bp->b_lblkno < 0) {
 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (reassignbufmethod == 1) {
 			/*
 			 * New sorting algorithm, only handle sequential case,
 			 * otherwise guess.
 			 */
 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
 			    (tbp->b_xflags & B_VNDIRTY)) {
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortgood;
 			} else {
 				TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 				++reassignbufsortbad;
 			}
 		} else {
 			/*
 			 * Old sorting algorithm, scan queue and insert
 			 */
 			struct buf *ttbp;
 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 			    (ttbp->b_lblkno < bp->b_lblkno)) {
 				++reassignbufloops;
 				tbp = ttbp;
 			}
 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 		}
 	} else {
 		bp->b_xflags |= B_VNCLEAN;
 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 		if ((newvp->v_flag & VONWORKLST) &&
 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 			newvp->v_flag &= ~VONWORKLST;
 			LIST_REMOVE(newvp, v_synclist);
 		}
 	}
 	if (bp->b_vp != newvp) {
 		bp->b_vp = newvp;
 		vhold(bp->b_vp);
 	}
 	splx(s);
 }
 
 /*
  * Create a vnode for a block device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV) {
 		*vpp = NULLVP;
 		return (ENXIO);
 	}
 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	vp = nvp;
 	/* dev2udev() results in a CDEV, so we need to cheat here. */
 	vp->v_type = VBLK;
 	if ((nvp = checkalias2(vp, dev, (struct mount *)0)) != NULL) {
 		vput(vp);
 		vp = nvp;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Check to see if the new vnode represents a special device
  * for which we already have a vnode (either because of
  * bdevvp() or because of a different vnode representing
  * the same block device). If such an alias exists, deallocate
  * the existing contents and return the aliased vnode. The
  * caller is responsible for filling it with its new contents.
  */
 struct vnode *
 checkalias(nvp, nvp_rdev, mp)
 	register struct vnode *nvp;
 	udev_t nvp_rdev;
 	struct mount *mp;
 {
 	dev_t	dev;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 
 	dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
 	return (checkalias2(nvp, dev, mp));
 }
 
 static struct vnode *
 checkalias2(nvp, dev, mp)
 	register struct vnode *nvp;
 	dev_t dev;
 	struct mount *mp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp;
 	struct vnode **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 	
 	vpp = &dev->si_hlist;
 loop:
 	simple_lock(&spechash_slock);
 	for (vp = *vpp; vp; vp = vp->v_specnext) {
 		if (nvp->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 * Only alias active device nodes.
 		 * Not sure why we don't re-use this like we do below.
 		 */
 		simple_lock(&vp->v_interlock);
 		if (vp->v_usecount == 0) {
 			simple_unlock(&spechash_slock);
 			vgonel(vp, p);
 			goto loop;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			/*
 			 * It dissappeared, and we may have slept.
 			 * Restart from the beginning
 			 */
 			simple_unlock(&spechash_slock);
 			goto loop;
 		}
 		break;
 	}
 	/*
 	 * It would be a lot clearer what is going on here if
 	 * this had been expressed as:
 	 * if ( vp && (vp->v_tag == VT_NULL))
 	 * and the clauses had been swapped.
 	 */
 	if (vp == NULL || vp->v_tag != VT_NON) {
 		struct specinfo *sinfo;
 
 		/*
 		 * Put the new vnode into the hash chain.
 		 * and if there was an alias, connect them.
 		 */
 		nvp->v_specnext = *vpp;
 		*vpp = nvp;
 		nvp->v_specinfo = sinfo = dev;
 
 		simple_unlock(&spechash_slock);
 		if (vp != NULLVP) {
 			nvp->v_flag |= VALIASED;
 			vp->v_flag |= VALIASED;
 			vput(vp);
 		}
 		return (NULLVP);
 	}
 	/*
 	 * if ( vp && (vp->v_tag == VT_NULL))
 	 * We have a vnode alias, but it is a trashed.
 	 * Make it look like it's newly allocated. (by getnewvnode())
 	 * The caller should use this instead.
 	 */
 	simple_unlock(&spechash_slock);
 	VOP_UNLOCK(vp, 0, p);
 	simple_lock(&vp->v_interlock);
 	vclean(vp, 0, p);
 	vp->v_op = nvp->v_op;
 	vp->v_tag = nvp->v_tag;
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
 	return (vp);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set if the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0) {
 		simple_lock(&vp->v_interlock);
 	}
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
 		return (ENOENT);
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			simple_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			simple_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 void
 vref(struct vnode *vp)
 {
 	simple_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		simple_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		simple_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		simple_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 	int s;
 
   	s = splbio();
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	splx(s);
 }
 
 /*
  * One less who cares about this vnode.
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	splx(s);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		simple_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			simple_unlock(&mntvnode_slock);
 			vgonel(vp, p);
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			simple_unlock(&mntvnode_slock);
 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		simple_unlock(&vp->v_interlock);
 		busy++;
 	}
 	simple_unlock(&mntvnode_slock);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 	vm_object_t obj;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 */
 	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 	if ((obj = vp->v_object) != NULL) {
 		if (obj->ref_count == 0) {
 			/*
 			 * This is a normal way of shutting down the object/vnode
 			 * association.
 			 */
 			vm_object_terminate(obj);
 		} else {
 			/*
 			 * Woe to the process that tries to page now :-).
 			 */
 			vm_pager_deallocate(obj);
 		}
 	}
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active)
 		vrele(vp);
 
 	cache_purge(vp);
 	if (vp->v_vnlock) {
 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
 #ifdef DIAGNOSTIC
 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 			vprint("vclean: lock not drained", vp);
 #endif
 #endif
 		FREE(vp->v_vnlock, M_VNODE);
 		vp->v_vnlock = NULL;
 	}
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 
 	vp = ap->a_vp;
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_flag & VALIASED) {
 		/*
 		 * If a vgone (or vclean) is already in progress,
 		 * wait until it is done and return.
 		 */
 		if (vp->v_flag & VXLOCK) {
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 			return (0);
 		}
 		/*
 		 * Ensure that vp will not be vgone'd while we
 		 * are eliminating its aliases.
 		 */
 		vp->v_flag |= VXLOCK;
 		simple_unlock(&vp->v_interlock);
 		while (vp->v_flag & VALIASED) {
 			simple_lock(&spechash_slock);
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_type != vp->v_type || vp == vq)
 					continue;
 				simple_unlock(&spechash_slock);
 				vgone(vq);
 				break;
 			}
 			if (vq == NULLVP) {
 				simple_unlock(&spechash_slock);
 			}
 		}
 		/*
 		 * Remove the lock so that vgone below will
 		 * really eliminate the vnode after which time
 		 * vgone will awaken any sleepers.
 		 */
 		simple_lock(&vp->v_interlock);
 		vp->v_flag &= ~VXLOCK;
 		if (vp->v_flag & VXWANT) {
 			vp->v_flag &= ~VXWANT;
 			wakeup(vp);
 		}
 	}
 	vgonel(vp, p);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct simplelock *inter_lkp;
 	struct proc *p;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			simple_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	simple_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 	struct vnode *vq;
 	struct vnode *vx;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	simple_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 		simple_lock(&spechash_slock);
 		if (vp->v_hashchain == vp) {
 			vp->v_hashchain = vp->v_specnext;
 		} else {
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_specnext != vp)
 					continue;
 				vq->v_specnext = vp->v_specnext;
 				break;
 			}
 			if (vq == NULL)
 				panic("missing bdev");
 		}
 		if (vp->v_flag & VALIASED) {
 			vx = NULL;
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_type != vp->v_type)
 					continue;
 				if (vx)
 					break;
 				vx = vq;
 			}
 			if (vx == NULL)
 				panic("missing alias");
 			if (vq == NULL)
 				vx->v_flag &= ~VALIASED;
 			vp->v_flag &= ~VALIASED;
 		}
 		simple_unlock(&spechash_slock);
 		vp->v_specinfo = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the back
 	 * pointer and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		simple_lock(&vnode_free_list_slock);
 		if (vp->v_flag & VFREE) {
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		} else if (vp->v_flag & VTBFREE) {
 			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 			vp->v_flag &= ~VTBFREE;
 			freevnodes++;
 		} else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	int rc = 0;
 
 	simple_lock(&spechash_slock);
 	for (vp = dev->si_hlist; vp; vp = vp->v_specnext) {
 		if (type != vp->v_type)
 			continue;
 		*vpp = vp;
 		rc = 1;
 		break;
 	}
 	simple_unlock(&spechash_slock);
 	return (rc);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	register struct vnode *vp;
 {
 	struct vnode *vq, *vnext;
 	int count;
 
 loop:
 	if ((vp->v_flag & VALIASED) == 0)
 		return (vp->v_usecount);
 	simple_lock(&spechash_slock);
 	for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) {
 		vnext = vq->v_specnext;
 		if (vq->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		if (vq->v_usecount == 0 && vq != vp) {
 			simple_unlock(&spechash_slock);
 			vgone(vq);
 			goto loop;
 		}
 		count += vq->v_usecount;
 	}
 	simple_unlock(&spechash_slock);
 	return (count);
 }
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	register struct vnode *vp;
 {
 	char buf[96];
 
 	if (label != NULL)
 		printf("%s: %p: ", label, (void *)vp);
 	else
 		printf("%p: ", (void *)vp);
 	printf("type %s, usecount %d, writecount %d, refcount %d,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VALIASED)
 		strcat(buf, "|VALIASED");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = vp->v_mntvnodes.le_next) {
 			if (VOP_ISLOCKED(vp))
 				vprint((char *)0, vp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl SYSCTL_HANDLER_ARGS
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 
 #if 0
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode SYSCTL_HANDLER_ARGS
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 again:
 		simple_lock(&mntvnode_slock);
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				simple_unlock(&mntvnode_slock);
 				goto again;
 			}
 			nvp = vp->v_mntvnodes.le_next;
 			simple_unlock(&mntvnode_slock);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			simple_lock(&mntvnode_slock);
 		}
 		simple_unlock(&mntvnode_slock);
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 
 	return (0);
 }
 #endif
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 #if 0
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int error = 0;
 
 	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
 		for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 			if (vq->v_type != vp->v_type)
 				continue;
 			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
 		}
 		simple_unlock(&spechash_slock);
 	}
 	return (error);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp, *nmp;
 	struct proc *p;
 	int error;
 
 	if (curproc != NULL)
 		p = curproc;
 	else
 		p = initproc;	/* XXX XXX should this be proc0? */
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 		nmp = mp->mnt_list.cqe_prev;
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		np->netc_anon = argp->ex_anon;
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 	bzero((caddr_t) np, i);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	np->netc_anon = argp->ex_anon;
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	struct vm_object *obj;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 
 		nvp = vp->v_mntvnodes.le_next;
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
 			continue;
 
 		if (flags != MNT_WAIT) {
 			obj = vp->v_object;
 			if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
 				continue;
 			if (VOP_ISLOCKED(vp))
 				continue;
 		}
 
 		simple_lock(&vp->v_interlock);
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (vp->v_object) {
 					vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * vp must be locked when vfs_object_create is called.
  */
 int
 vfs_object_create(vp, p, cred)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 {
 	struct vattr vat;
 	vm_object_t object;
 	int error = 0;
 
 	if (vp->v_type != VBLK && vn_canvmio(vp) == FALSE)
 		return 0;
 
 retry:
 	if ((object = vp->v_object) == NULL) {
 		if (vp->v_type == VREG || vp->v_type == VDIR) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
 			object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
 		} else if (bdevsw(vp->v_rdev) != NULL) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a VBLK vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
 			object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
 		} else {
 			goto retn;
 		}
 		/*
 		 * Dereference the reference we just created.  This assumes
 		 * that the object is associated with the vp.
 		 */
 		object->ref_count--;
 		vp->v_usecount--;
 	} else {
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
 			tsleep(object, PVM, "vodead", 0);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
 	}
 
 	KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
 	vp->v_flag |= VOBJBUF;
 
 retn:
 	return error;
 }
 
 static void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	}
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	} else {
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 	}
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		simple_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 static int	sync_fsync __P((struct  vop_fsync_args *));
 static int	sync_inactive __P((struct  vop_inactive_args *));
 static int	sync_reclaim  __P((struct  vop_reclaim_args *));
 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 static int	sync_print __P((struct vop_print_args *));
 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 
 static vop_t **sync_vnodeop_p;
 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 	{ &sync_vnodeop_p, sync_vnodeop_entries };
 
 VNODEOP_SET(sync_vnodeop_opv_desc);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 int
 vfs_allocate_syncvnode(mp)
 	struct mount *mp;
 {
 	struct vnode *vp;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 		mp->mnt_syncer = NULL;
 		return (error);
 	}
 	vp->v_type = VNON;
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 	mp->mnt_syncer = vp;
 	return (0);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	struct proc *p = ap->a_p;
 	int asyncflag;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	vn_syncer_add_to_worklist(syncvp, syncdelay);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	simple_lock(&mountlist_slock);
 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
 	vfs_unbusy(mp, p);
 	return (0);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected at splbio().
  */
 static int
 sync_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 
 	s = splbio();
 	vp->v_mount->mnt_syncer = NULL;
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 		vp->v_flag &= ~VONWORKLST;
 	}
 	splx(s);
 
 	return (0);
 }
 
 /*
  * Print out a syncer vnode.
  */
 static int
 sync_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	printf("syncer vnode");
 	if (vp->v_vnlock != NULL)
 		lockmgr_printinfo(vp->v_vnlock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * extract the dev_t from a VBLK or VCHR
  */
 dev_t
 vn_todev(vp)
 	struct vnode *vp;
 {
 	if (vp->v_type != VBLK && vp->v_type != VCHR)
 		return (NODEV);
 	return (vp->v_rdev);
 }
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c	(revision 49534)
+++ head/sys/kern/vfs_subr.c	(revision 49535)
@@ -1,2978 +1,2976 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.213 1999/07/20 09:47:44 phk Exp $
+ * $Id: vfs_subr.c,v 1.214 1999/07/26 06:25:17 alc Exp $
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
-#include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/kthread.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/domain.h>
 #include <sys/dirent.h>
 #include <sys/vmmeter.h>
+#include <sys/conf.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
-
-#include <miscfs/specfs/specdev.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static struct vnode *checkalias2 __P((struct vnode *nvp, dev_t dev, struct mount *mp));
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vfree __P((struct vnode *));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
 static unsigned long	numvnodes;
 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 struct tobefreelist vnode_tobefree_list;	/* vnode free list */
 
 static u_long wantfreevnodes = 25;
 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 static u_long freevnodes = 0;
 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
 static int reassignbufloops;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
 static int reassignbufsortgood;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
 static int reassignbufsortbad;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
 static int reassignbufmethod = 1;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
 
 #ifdef ENABLE_VFS_IOOPT
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 #endif
 
 struct mntlist mountlist;	/* mounted filesystem list */
 struct simplelock mountlist_slock;
 struct simplelock mntvnode_slock;
 int	nfs_mount_type = -1;
 #ifndef NULL_SIMPLELOCKS
 static struct simplelock mntid_slock;
 static struct simplelock vnode_free_list_slock;
 static struct simplelock spechash_slock;
 #endif
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
 /*
  * The workitem queue.
  */
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 time_t syncdelay = 30;		/* max time to delay syncing data */
 time_t filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
 time_t dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 time_t metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
 static int rushjob;			/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
 static int syncer_delayno = 0;
 static long syncer_mask; 
 LIST_HEAD(synclist, vnode);
 static struct synclist *syncer_workitem_pending;
 
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 
     &desiredvnodes, 0, "Maximum number of vnodes");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 void
 vntblinit()
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	simple_lock_init(&mntvnode_slock);
 	simple_lock_init(&mntid_slock);
 	simple_lock_init(&spechash_slock);
 	TAILQ_INIT(&vnode_free_list);
 	TAILQ_INIT(&vnode_tobefree_list);
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
 	/*
 	 * Initialize the filesystem syncer.
 	 */     
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
 		&syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct simplelock *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		if (interlkp) {
 			simple_unlock(interlkp);
 		}
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
 		if (interlkp) {
 			simple_lock(interlkp);
 		}
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	if (fstypename == NULL)
 		return (ENODEV);
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 	    mp = mp->mnt_list.cqe_next) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			simple_unlock(&mountlist_slock);
 			return (mp);
 	    }
 	}
 	simple_unlock(&mountlist_slock);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_short xxxfs_mntid;
 
 	fsid_t tfsid;
 	int mtype;
 
 	simple_lock(&mntid_slock); 
 	mtype = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_stat.f_fsid.val[0] = makeudev(255, mtype);
 	mp->mnt_stat.f_fsid.val[1] = mtype;
 	if (xxxfs_mntid == 0)
 		++xxxfs_mntid;
 	tfsid.val[0] = makeudev(255, mtype + (xxxfs_mntid << 16));
 	tfsid.val[1] = mtype;
 	if (mountlist.cqh_first != (void *)&mountlist) {
 		while (vfs_getvfs(&tfsid)) {
 			xxxfs_mntid++;
 			tfsid.val[0] = makeudev(255,
 			    mtype + (xxxfs_mntid << 16));
 		}
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	simple_unlock(&mntid_slock);
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 extern vop_t **dead_vnodeop_p;
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *tvp, *nvp;
 	vm_object_t object;
 	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	TAILQ_INIT(&vnode_tmp_list);
 
 	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
 		nvp = TAILQ_NEXT(vp, v_freelist);
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		if (vp->v_flag & VAGE) {
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		} else {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		}
 		vp->v_flag &= ~(VTBFREE|VAGE);
 		vp->v_flag |= VFREE;
 		if (vp->v_usecount)
 			panic("tobe free vnode isn't");
 		freevnodes++;
 	}
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else {
 		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
 			nvp = TAILQ_NEXT(vp, v_freelist);
 			if (!simple_lock_try(&vp->v_interlock)) 
 				continue;
 			if (vp->v_usecount)
 				panic("free vnode isn't");
 
 			object = vp->v_object;
 			if (object && (object->resident_page_count || object->ref_count)) {
 				printf("object inconsistant state: RPC: %d, RC: %d\n",
 					object->resident_page_count, object->ref_count);
 				/* Don't recycle if it's caching some pages */
 				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
 				continue;
 			} else if (LIST_FIRST(&vp->v_cache_src)) {
 				/* Don't recycle if active in the namecache */
 				simple_unlock(&vp->v_interlock);
 				continue;
 			} else {
 				break;
 			}
 		}
 	}
 
 	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
 		nvp = TAILQ_NEXT(tvp, v_freelist);
 		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
 		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
 		simple_unlock(&tvp->v_interlock);
 	}
 
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 		simple_unlock(&vnode_free_list_slock);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 
 #ifdef INVARIANTS
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastr = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 		vp->v_writecount = 0;	/* XXX */
 		vp->v_maxio = 0;
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		simple_lock_init(&vp->v_interlock);
 		vp->v_dd = vp;
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	TAILQ_INIT(&vp->v_cleanblkhd);
 	TAILQ_INIT(&vp->v_dirtyblkhd);
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	simple_lock(&mntvnode_slock);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		simple_unlock(&mntvnode_slock);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	simple_unlock(&mntvnode_slock);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
 	if (flags & V_SAVE) {
 		s = splbio();
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
 			if (error) {
 				splx(s);
 				return (error);
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			splx(s);
 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 				return (error);
 			s = splbio();
 			if (vp->v_numoutput > 0 ||
 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
 				panic("vinvalbuf: dirty bufs");
 		}
 		splx(s);
   	}
 	s = splbio();
 	for (;;) {
 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
 		if (!blist)
 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 				error = BUF_TIMELOCK(bp,
 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
 				    "vinvalbuf", slpflag, slptimeo);
 				if (error == ENOLCK)
 					break;
 				splx(s);
 				return (error);
 			}
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.  Note that vfs_bio_awrite expects
 			 * buffers to reside on a queue, while VOP_BWRITE and
 			 * brelse do not.
 			 */
 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 				(flags & V_SAVE)) {
 
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						BUF_UNLOCK(bp);
 						vfs_bio_awrite(bp);
 					} else {
 						bremfree(bp);
 						bp->b_flags |= B_ASYNC;
 						VOP_BWRITE(bp->b_vp, bp);
 					}
 				} else {
 					bremfree(bp);
 					(void) VOP_BWRITE(bp->b_vp, bp);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	simple_lock(&vp->v_interlock);
 	object = vp->v_object;
 	if (object != NULL) {
 		vm_object_page_remove(object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	simple_unlock(&vp->v_interlock);
 
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(vp, cred, p, length, blksize)
 	register struct vnode *vp;
 	struct ucred *cred;
 	struct proc *p;
 	off_t length;
 	int blksize;
 {
 	register struct buf *bp;
 	struct buf *nbp;
 	int s, anyfreed;
 	int trunclbn;
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = (length + blksize - 1) / blksize;
 
 	s = splbio();
 restart:
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp && (((nbp->b_xflags & B_VNCLEAN) == 0)||
 					 (nbp->b_vp != vp) ||
 					 (nbp->b_flags & B_DELWRI))) {
 					goto restart;
 				}
 			}
 		}
 
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp && (((nbp->b_xflags & B_VNDIRTY) == 0)||
 					 (nbp->b_vp != vp) ||
 					 (nbp->b_flags & B_DELWRI) == 0)) {
 					goto restart;
 				}
 			}
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					if (bp->b_vp == vp) {
 						bp->b_flags |= B_ASYNC;
 					} else {
 						bp->b_flags &= ~B_ASYNC;
 					}
 					VOP_BWRITE(bp->b_vp, bp);
 				}
 				goto restartsync;
 			}
 
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
 	}
 
 	splx(s);
 
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bp->b_xflags |= B_VNCLEAN;
 	bp->b_xflags &= ~B_VNDIRTY;
 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	struct buflists *listheadp;
 	int s;
 
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
 		if (bp->b_xflags & B_VNDIRTY)
 			listheadp = &vp->v_dirtyblkhd;
 		else 
 			listheadp = &vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
 	}
 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vp->v_flag &= ~VONWORKLST;
 		LIST_REMOVE(vp, v_synclist);
 	}
 	splx(s);
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
  * The workitem queue.
  * 
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 {
 	int s, slot;
 
 	s = splbio();
 
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
 	vp->v_flag |= VONWORKLST;
 	splx(s);
 }
 
 struct  proc *updateproc;
 static void sched_sync __P((void));
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
 
 /*
  * System filesystem synchronizer daemon.
  */
 void 
 sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
 
 	p->p_flag |= P_BUFEXHAUST;
 
 	for (;;) {
 		starttime = time_second;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 */
 		s = splbio();
 		slp = &syncer_workitem_pending[syncer_delayno];
 		syncer_delayno += 1;
 		if (syncer_delayno == syncer_maxdelay)
 			syncer_delayno = 0;
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
 			if (VOP_ISLOCKED(vp) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
 				/*
 				 * Note: v_tag VT_VFS vps can remain on the
 				 * worklist too with no dirty blocks, but 
 				 * since sync_fsync() moves it to a different 
 				 * slot we are safe.
 				 */
 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
 				    vp->v_type != VBLK)
 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
 				 * position and then add us back in at a later
 				 * position.
 				 */
 				vn_syncer_add_to_worklist(vp, syncdelay);
 			}
 			splx(s);
 		}
 
 		/*
 		 * Do soft update processing.
 		 */
 		if (bioops.io_sync)
 			(*bioops.io_sync)(NULL);
 
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (time_second == starttime)
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer()
 {
 	int s;
 
 	s = splhigh();
 	if (updateproc->p_wchan == &lbolt)
 		setrunnable(updateproc);
 	splx(s);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		return (1);
 	}
 	return(0);
 }
 
 /*
  * Associate a p-buffer with a vnode.
  *
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
  * with the buffer.  i.e. the bp has not been linked into the vnode or
  * ref-counted.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 
 	bp->b_vp = vp;
 	bp->b_flags |= B_PAGING;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 
 #if !defined(MAX_PERF)
 	/* XXX REMOVE ME */
 	if (bp->b_vnbufs.tqe_next != NULL) {
 		panic(
 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x", 
 		    bp,
 		    (int)bp->b_flags
 		);
 	}
 #endif
 	bp->b_vp = (struct vnode *) 0;
 	bp->b_flags &= ~B_PAGING;
 }
 
 void
 pbreassignbuf(bp, newvp)
 	struct buf *bp;
 	struct vnode *newvp;
 {
 #if !defined(MAX_PERF)
 	if ((bp->b_flags & B_PAGING) == 0) {
 		panic(
 		    "pbreassignbuf() on non phys bp %p", 
 		    bp
 		);
 	}
 #endif
 	bp->b_vp = newvp;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
 	struct buflists *listheadp;
 	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 	++reassignbufcalls;
 
 #if !defined(MAX_PERF)
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 #endif
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_xflags & (B_VNDIRTY|B_VNCLEAN)) {
 		if (bp->b_xflags & B_VNDIRTY)
 			listheadp = &bp->b_vp->v_dirtyblkhd;
 		else 
 			listheadp = &bp->b_vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(B_VNDIRTY|B_VNCLEAN);
 		if (bp->b_vp != newvp) {
 			vdrop(bp->b_vp);
 			bp->b_vp = NULL;	/* for clarification */
 		}
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
 		listheadp = &newvp->v_dirtyblkhd;
 		if ((newvp->v_flag & VONWORKLST) == 0) {
 			switch (newvp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VBLK:
 				if (newvp->v_specmountpoint != NULL) {
 					delay = metadelay;
 					break;
 				}
 				/* fall through */
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(newvp, delay);
 		}
 		bp->b_xflags |= B_VNDIRTY;
 		tbp = TAILQ_FIRST(listheadp);
 		if (tbp == NULL ||
 		    bp->b_lblkno == 0 ||
 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (bp->b_lblkno < 0) {
 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (reassignbufmethod == 1) {
 			/*
 			 * New sorting algorithm, only handle sequential case,
 			 * otherwise guess.
 			 */
 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
 			    (tbp->b_xflags & B_VNDIRTY)) {
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortgood;
 			} else {
 				TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 				++reassignbufsortbad;
 			}
 		} else {
 			/*
 			 * Old sorting algorithm, scan queue and insert
 			 */
 			struct buf *ttbp;
 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 			    (ttbp->b_lblkno < bp->b_lblkno)) {
 				++reassignbufloops;
 				tbp = ttbp;
 			}
 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 		}
 	} else {
 		bp->b_xflags |= B_VNCLEAN;
 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 		if ((newvp->v_flag & VONWORKLST) &&
 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 			newvp->v_flag &= ~VONWORKLST;
 			LIST_REMOVE(newvp, v_synclist);
 		}
 	}
 	if (bp->b_vp != newvp) {
 		bp->b_vp = newvp;
 		vhold(bp->b_vp);
 	}
 	splx(s);
 }
 
 /*
  * Create a vnode for a block device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV) {
 		*vpp = NULLVP;
 		return (ENXIO);
 	}
 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	vp = nvp;
 	/* dev2udev() results in a CDEV, so we need to cheat here. */
 	vp->v_type = VBLK;
 	if ((nvp = checkalias2(vp, dev, (struct mount *)0)) != NULL) {
 		vput(vp);
 		vp = nvp;
 	}
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Check to see if the new vnode represents a special device
  * for which we already have a vnode (either because of
  * bdevvp() or because of a different vnode representing
  * the same block device). If such an alias exists, deallocate
  * the existing contents and return the aliased vnode. The
  * caller is responsible for filling it with its new contents.
  */
 struct vnode *
 checkalias(nvp, nvp_rdev, mp)
 	register struct vnode *nvp;
 	udev_t nvp_rdev;
 	struct mount *mp;
 {
 	dev_t	dev;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 
 	dev = udev2dev(nvp_rdev, nvp->v_type == VBLK ? 1 : 0);
 	return (checkalias2(nvp, dev, mp));
 }
 
 static struct vnode *
 checkalias2(nvp, dev, mp)
 	register struct vnode *nvp;
 	dev_t dev;
 	struct mount *mp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp;
 	struct vnode **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 	
 	vpp = &dev->si_hlist;
 loop:
 	simple_lock(&spechash_slock);
 	for (vp = *vpp; vp; vp = vp->v_specnext) {
 		if (nvp->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 * Only alias active device nodes.
 		 * Not sure why we don't re-use this like we do below.
 		 */
 		simple_lock(&vp->v_interlock);
 		if (vp->v_usecount == 0) {
 			simple_unlock(&spechash_slock);
 			vgonel(vp, p);
 			goto loop;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			/*
 			 * It dissappeared, and we may have slept.
 			 * Restart from the beginning
 			 */
 			simple_unlock(&spechash_slock);
 			goto loop;
 		}
 		break;
 	}
 	/*
 	 * It would be a lot clearer what is going on here if
 	 * this had been expressed as:
 	 * if ( vp && (vp->v_tag == VT_NULL))
 	 * and the clauses had been swapped.
 	 */
 	if (vp == NULL || vp->v_tag != VT_NON) {
 		struct specinfo *sinfo;
 
 		/*
 		 * Put the new vnode into the hash chain.
 		 * and if there was an alias, connect them.
 		 */
 		nvp->v_specnext = *vpp;
 		*vpp = nvp;
 		nvp->v_specinfo = sinfo = dev;
 
 		simple_unlock(&spechash_slock);
 		if (vp != NULLVP) {
 			nvp->v_flag |= VALIASED;
 			vp->v_flag |= VALIASED;
 			vput(vp);
 		}
 		return (NULLVP);
 	}
 	/*
 	 * if ( vp && (vp->v_tag == VT_NULL))
 	 * We have a vnode alias, but it is a trashed.
 	 * Make it look like it's newly allocated. (by getnewvnode())
 	 * The caller should use this instead.
 	 */
 	simple_unlock(&spechash_slock);
 	VOP_UNLOCK(vp, 0, p);
 	simple_lock(&vp->v_interlock);
 	vclean(vp, 0, p);
 	vp->v_op = nvp->v_op;
 	vp->v_tag = nvp->v_tag;
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
 	return (vp);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set if the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0) {
 		simple_lock(&vp->v_interlock);
 	}
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
 		return (ENOENT);
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			simple_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			simple_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 void
 vref(struct vnode *vp)
 {
 	simple_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		simple_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		simple_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		simple_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 	int s;
 
   	s = splbio();
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	splx(s);
 }
 
 /*
  * One less who cares about this vnode.
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	splx(s);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		simple_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			simple_unlock(&mntvnode_slock);
 			vgonel(vp, p);
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			simple_unlock(&mntvnode_slock);
 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		simple_unlock(&vp->v_interlock);
 		busy++;
 	}
 	simple_unlock(&mntvnode_slock);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 	vm_object_t obj;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 */
 	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 	if ((obj = vp->v_object) != NULL) {
 		if (obj->ref_count == 0) {
 			/*
 			 * This is a normal way of shutting down the object/vnode
 			 * association.
 			 */
 			vm_object_terminate(obj);
 		} else {
 			/*
 			 * Woe to the process that tries to page now :-).
 			 */
 			vm_pager_deallocate(obj);
 		}
 	}
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active)
 		vrele(vp);
 
 	cache_purge(vp);
 	if (vp->v_vnlock) {
 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
 #ifdef DIAGNOSTIC
 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 			vprint("vclean: lock not drained", vp);
 #endif
 #endif
 		FREE(vp->v_vnlock, M_VNODE);
 		vp->v_vnlock = NULL;
 	}
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 
 	vp = ap->a_vp;
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_flag & VALIASED) {
 		/*
 		 * If a vgone (or vclean) is already in progress,
 		 * wait until it is done and return.
 		 */
 		if (vp->v_flag & VXLOCK) {
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 			return (0);
 		}
 		/*
 		 * Ensure that vp will not be vgone'd while we
 		 * are eliminating its aliases.
 		 */
 		vp->v_flag |= VXLOCK;
 		simple_unlock(&vp->v_interlock);
 		while (vp->v_flag & VALIASED) {
 			simple_lock(&spechash_slock);
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_type != vp->v_type || vp == vq)
 					continue;
 				simple_unlock(&spechash_slock);
 				vgone(vq);
 				break;
 			}
 			if (vq == NULLVP) {
 				simple_unlock(&spechash_slock);
 			}
 		}
 		/*
 		 * Remove the lock so that vgone below will
 		 * really eliminate the vnode after which time
 		 * vgone will awaken any sleepers.
 		 */
 		simple_lock(&vp->v_interlock);
 		vp->v_flag &= ~VXLOCK;
 		if (vp->v_flag & VXWANT) {
 			vp->v_flag &= ~VXWANT;
 			wakeup(vp);
 		}
 	}
 	vgonel(vp, p);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct simplelock *inter_lkp;
 	struct proc *p;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			simple_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	simple_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 	struct vnode *vq;
 	struct vnode *vx;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	simple_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 		simple_lock(&spechash_slock);
 		if (vp->v_hashchain == vp) {
 			vp->v_hashchain = vp->v_specnext;
 		} else {
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_specnext != vp)
 					continue;
 				vq->v_specnext = vp->v_specnext;
 				break;
 			}
 			if (vq == NULL)
 				panic("missing bdev");
 		}
 		if (vp->v_flag & VALIASED) {
 			vx = NULL;
 			for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_type != vp->v_type)
 					continue;
 				if (vx)
 					break;
 				vx = vq;
 			}
 			if (vx == NULL)
 				panic("missing alias");
 			if (vq == NULL)
 				vx->v_flag &= ~VALIASED;
 			vp->v_flag &= ~VALIASED;
 		}
 		simple_unlock(&spechash_slock);
 		vp->v_specinfo = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the back
 	 * pointer and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		simple_lock(&vnode_free_list_slock);
 		if (vp->v_flag & VFREE) {
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		} else if (vp->v_flag & VTBFREE) {
 			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 			vp->v_flag &= ~VTBFREE;
 			freevnodes++;
 		} else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	int rc = 0;
 
 	simple_lock(&spechash_slock);
 	for (vp = dev->si_hlist; vp; vp = vp->v_specnext) {
 		if (type != vp->v_type)
 			continue;
 		*vpp = vp;
 		rc = 1;
 		break;
 	}
 	simple_unlock(&spechash_slock);
 	return (rc);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	register struct vnode *vp;
 {
 	struct vnode *vq, *vnext;
 	int count;
 
 loop:
 	if ((vp->v_flag & VALIASED) == 0)
 		return (vp->v_usecount);
 	simple_lock(&spechash_slock);
 	for (count = 0, vq = vp->v_hashchain; vq; vq = vnext) {
 		vnext = vq->v_specnext;
 		if (vq->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		if (vq->v_usecount == 0 && vq != vp) {
 			simple_unlock(&spechash_slock);
 			vgone(vq);
 			goto loop;
 		}
 		count += vq->v_usecount;
 	}
 	simple_unlock(&spechash_slock);
 	return (count);
 }
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	register struct vnode *vp;
 {
 	char buf[96];
 
 	if (label != NULL)
 		printf("%s: %p: ", label, (void *)vp);
 	else
 		printf("%p: ", (void *)vp);
 	printf("type %s, usecount %d, writecount %d, refcount %d,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VALIASED)
 		strcat(buf, "|VALIASED");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = vp->v_mntvnodes.le_next) {
 			if (VOP_ISLOCKED(vp))
 				vprint((char *)0, vp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl SYSCTL_HANDLER_ARGS
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 
 #if 0
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode SYSCTL_HANDLER_ARGS
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 again:
 		simple_lock(&mntvnode_slock);
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				simple_unlock(&mntvnode_slock);
 				goto again;
 			}
 			nvp = vp->v_mntvnodes.le_next;
 			simple_unlock(&mntvnode_slock);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			simple_lock(&mntvnode_slock);
 		}
 		simple_unlock(&mntvnode_slock);
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 
 	return (0);
 }
 #endif
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 #if 0
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int error = 0;
 
 	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
 		for (vq = vp->v_hashchain; vq; vq = vq->v_specnext) {
 			if (vq->v_type != vp->v_type)
 				continue;
 			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
 		}
 		simple_unlock(&spechash_slock);
 	}
 	return (error);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp, *nmp;
 	struct proc *p;
 	int error;
 
 	if (curproc != NULL)
 		p = curproc;
 	else
 		p = initproc;	/* XXX XXX should this be proc0? */
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 		nmp = mp->mnt_list.cqe_prev;
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		np->netc_anon = argp->ex_anon;
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 	bzero((caddr_t) np, i);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	np->netc_anon = argp->ex_anon;
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	struct vm_object *obj;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 
 		nvp = vp->v_mntvnodes.le_next;
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
 			continue;
 
 		if (flags != MNT_WAIT) {
 			obj = vp->v_object;
 			if (obj == NULL || (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
 				continue;
 			if (VOP_ISLOCKED(vp))
 				continue;
 		}
 
 		simple_lock(&vp->v_interlock);
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (vp->v_object) {
 					vm_object_page_clean(vp->v_object, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : 0);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * vp must be locked when vfs_object_create is called.
  */
 int
 vfs_object_create(vp, p, cred)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 {
 	struct vattr vat;
 	vm_object_t object;
 	int error = 0;
 
 	if (vp->v_type != VBLK && vn_canvmio(vp) == FALSE)
 		return 0;
 
 retry:
 	if ((object = vp->v_object) == NULL) {
 		if (vp->v_type == VREG || vp->v_type == VDIR) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
 			object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
 		} else if (bdevsw(vp->v_rdev) != NULL) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a VBLK vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
 			object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
 		} else {
 			goto retn;
 		}
 		/*
 		 * Dereference the reference we just created.  This assumes
 		 * that the object is associated with the vp.
 		 */
 		object->ref_count--;
 		vp->v_usecount--;
 	} else {
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
 			tsleep(object, PVM, "vodead", 0);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
 	}
 
 	KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
 	vp->v_flag |= VOBJBUF;
 
 retn:
 	return error;
 }
 
 static void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	}
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	} else {
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 	}
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		simple_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 static int	sync_fsync __P((struct  vop_fsync_args *));
 static int	sync_inactive __P((struct  vop_inactive_args *));
 static int	sync_reclaim  __P((struct  vop_reclaim_args *));
 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 static int	sync_print __P((struct vop_print_args *));
 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 
 static vop_t **sync_vnodeop_p;
 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 	{ &sync_vnodeop_p, sync_vnodeop_entries };
 
 VNODEOP_SET(sync_vnodeop_opv_desc);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 int
 vfs_allocate_syncvnode(mp)
 	struct mount *mp;
 {
 	struct vnode *vp;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 		mp->mnt_syncer = NULL;
 		return (error);
 	}
 	vp->v_type = VNON;
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 	mp->mnt_syncer = vp;
 	return (0);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	struct proc *p = ap->a_p;
 	int asyncflag;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	vn_syncer_add_to_worklist(syncvp, syncdelay);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	simple_lock(&mountlist_slock);
 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0) {
 		simple_unlock(&mountlist_slock);
 		return (0);
 	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
 	vfs_unbusy(mp, p);
 	return (0);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected at splbio().
  */
 static int
 sync_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 
 	s = splbio();
 	vp->v_mount->mnt_syncer = NULL;
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 		vp->v_flag &= ~VONWORKLST;
 	}
 	splx(s);
 
 	return (0);
 }
 
 /*
  * Print out a syncer vnode.
  */
 static int
 sync_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	printf("syncer vnode");
 	if (vp->v_vnlock != NULL)
 		lockmgr_printinfo(vp->v_vnlock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * extract the dev_t from a VBLK or VCHR
  */
 dev_t
 vn_todev(vp)
 	struct vnode *vp;
 {
 	if (vp->v_type != VBLK && vp->v_type != VCHR)
 		return (NODEV);
 	return (vp->v_rdev);
 }
Index: head/sys/miscfs/devfs/devfs_vnops.c
===================================================================
--- head/sys/miscfs/devfs/devfs_vnops.c	(revision 49534)
+++ head/sys/miscfs/devfs/devfs_vnops.c	(revision 49535)
@@ -1,2135 +1,2134 @@
 /*
  * Copyright 1997,1998 Julian Elischer.  All rights reserved.
  * julian@freebsd.org
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright notice,
  *     this list of conditions and the following disclaimer in the documentation
  *     and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- *	$Id: devfs_vnops.c,v 1.74 1999/05/11 19:54:35 phk Exp $
+ *	$Id: devfs_vnops.c,v 1.75 1999/06/26 02:46:17 mckusick Exp $
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/conf.h>
 #include <sys/disklabel.h>
 #include <sys/lock.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>/* definitions of spec functions we use */
 #include <sys/dirent.h>
 #include <miscfs/devfs/devfsdefs.h>
 #include <sys/vmmeter.h>                                                        
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
 
 
 /*
  * Insert description here
  */
 
 
 /*
  * Convert a component of a pathname into a pointer to a locked node.
  * This is a very central and rather complicated routine.
  * If the file system is not maintained in a strict tree hierarchy,
  * this can result in a deadlock situation (see comments in code below).
  *
  * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
  * whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  * exists, lookup returns both the target and its parent directory locked.
  * When creating or renaming and LOCKPARENT is specified, the target may
  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  * be "."., but the caller must check to ensure it does an vrele and DNUNLOCK
  * instead of two DNUNLOCKs.
  *
  * Overall outline of devfs_lookup:
  *
  *	check accessibility of directory
  *	null terminate the component (lookup leaves the whole string alone)
  *	look for name in cache, if found, then if at end of path
  *	  and deleting or creating, drop it, else return name
  *	search for name in directory, to found or notfound
  * notfound:
  *	if creating, return locked directory,
  *	else return error
  * found:
  *	if at end of path and deleting, return information to allow delete
  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
  *	  node and return info to allow rewrite
  *	if not at end, add name to cache; if at end and neither creating
  *	  nor deleting, add name to cache
  * On return to lookup, remove the null termination we put in at the start.
  *
  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent node unlocked.
  */
 static int
 devfs_lookup(struct vop_lookup_args *ap)
         /*struct vop_lookup_args {
                 struct vnode * a_dvp; directory vnode ptr
                 struct vnode ** a_vpp; where to put the result
                 struct componentname * a_cnp; the name we want
         };*/
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dir_vnode = ap->a_dvp;
 	struct vnode **result_vnode = ap->a_vpp;
         dn_p   dir_node;       /* the directory we are searching */
         dn_p   new_node;       /* the node we are searching for */
 	devnm_p new_nodename;
 	int flags = cnp->cn_flags;
         int op = cnp->cn_nameiop;       /* LOOKUP, CREATE, RENAME, or DELETE */
         int lockparent = flags & LOCKPARENT;
         int wantparent = flags & (LOCKPARENT|WANTPARENT);
         int error = 0;
 	struct proc *p = cnp->cn_proc;
 	char	heldchar;	/* the char at the end of the name componet */
 
 	*result_vnode = NULL; /* safe not sorry */ /*XXX*/
 
 DBPRINT(("lookup\n"));
 
 	if (dir_vnode->v_usecount == 0)
 	    printf("dir had no refs ");
 	if (devfs_vntodn(dir_vnode,&dir_node))
 	{
 		printf("vnode has changed?\n");
 		vprint("=",dir_vnode);
 		return(EINVAL);
 	}
 
 	/*
 	 * Check accessiblity of directory.
 	 */
 	if (dir_node->type != DEV_DIR) /* XXX or symlink? */
 	{
 		return (ENOTDIR);
 	}
 	if ((error = VOP_ACCESS(dir_vnode, VEXEC, cnp->cn_cred, p)) != 0)
 	{
 		return (error);
 	}
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 *
 	 */
 
 /***********************************************************************\
 * SEARCH FOR NAME							*
 * while making sure the component is null terminated for the strcmp 	*
 \***********************************************************************/
 
 	heldchar = cnp->cn_nameptr[cnp->cn_namelen];
 	cnp->cn_nameptr[cnp->cn_namelen] = '\0';
 	new_nodename = dev_findname(dir_node,cnp->cn_nameptr);
 	cnp->cn_nameptr[cnp->cn_namelen] = heldchar;
 	if(!new_nodename) {
 		/*******************************************************\
 		* Failed to find it.. (That may be good)		*
 		\*******************************************************/
 		new_node = NULL; /* to be safe */
 		/*
 		 * If creating, and at end of pathname
 		 * then can consider
 		 * allowing file to be created.
 		 */
         	if (!(flags & ISLASTCN) || !(op == CREATE || op == RENAME)) {
 			return ENOENT;
 		}
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 */
 		if ((error = VOP_ACCESS(dir_vnode, VWRITE,
 				cnp->cn_cred, p)) != 0)
 		{
 DBPRINT(("MKACCESS "));
 			return (error);
 		}
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to add a new entry.
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory vnode in namei_data->ni_dvp.
 		 * The pathname buffer is saved so that the name
 		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
 		cnp->cn_flags |= SAVENAME; /*XXX why? */
 		if (!lockparent)
 			VOP_UNLOCK(dir_vnode, 0, p);
 		return (EJUSTRETURN);
 	}
 
 	/***************************************************************\
 	* Found it.. this is not always a good thing..			*
 	\***************************************************************/
 	new_node = new_nodename->dnp;
 	new_node->last_lookup = new_nodename; /* for unlink */
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 * If the wantparent flag isn't set, we return only
 	 * the directory (in namei_data->ni_dvp), otherwise we go
 	 * on and lock the node, being careful with ".".
 	 */
 	if (op == DELETE && (flags & ISLASTCN)) {
 		/*
 		 * Write access to directory required to delete files.
 		 */
 		if ((error = VOP_ACCESS(dir_vnode, VWRITE,
 				cnp->cn_cred, p)) != 0)
 			return (error);
 		/*
 		 * we are trying to delete '.'.  What does this mean? XXX
 		 */
 		if (dir_node == new_node) {
 			VREF(dir_vnode);
 			*result_vnode = dir_vnode;
 			return (0);
 		}
 		/*
 		 * If directory is "sticky", then user must own
 		 * the directory, or the file in it, else she
 		 * may not delete it (unless she's root). This
 		 * implements append-only directories.
 		 */
 		devfs_dntovn(new_node,result_vnode);
 #ifdef NOTYET
 		if ((dir_node->mode & ISVTX) &&
 		    cnp->cn_cred->cr_uid != 0 &&
 		    cnp->cn_cred->cr_uid != dir_node->uid &&
 		    cnp->cn_cred->cr_uid != new_node->uid) {
 			VOP_UNLOCK(*result_vnode, 0, p);
 			return (EPERM);
 		}
 #endif
 		if (!lockparent)
 			VOP_UNLOCK(dir_vnode, 0, p);
 		return (0);
 	}
 
 	/*
 	 * If rewriting (RENAME), return the vnode and the
 	 * information required to rewrite the present directory
 	 * Must get node of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (op == RENAME && wantparent && (flags & ISLASTCN)) {
 		/*
 		 * Are we allowed to change the holding directory?
 		 */
 		if ((error = VOP_ACCESS(dir_vnode, VWRITE,
 				cnp->cn_cred, p)) != 0)
 			return (error);
 		/*
 		 * Careful about locking second node.
 		 * This can only occur if the target is ".".
 		 */
 		if (dir_node == new_node)
 			return (EISDIR);
 		devfs_dntovn(new_node,result_vnode);
 		/* hmm save the 'from' name (we need to delete it) */
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dir_vnode, 0, p);
 		return (0);
 	}
 
 	/*
 	 * Step through the translation in the name.  We do not unlock the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "saved_dir_node" XXX.  We must get the target
 	 * node before unlocking
 	 * the directory to insure that the node will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * nodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
 	 * and parent directories are removed before the lock for the
 	 * node associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
 	 * Note also that this simple deadlock detection scheme will not
 	 * work if the file system has any hard links other than ".."
 	 * that point backwards in the directory structure.
 	 */
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dir_vnode, 0, p);	/* race to get the node */
 		devfs_dntovn(new_node,result_vnode);
 		if (lockparent && (flags & ISLASTCN))
 			vn_lock(dir_vnode, LK_EXCLUSIVE | LK_RETRY, p);
 	} else if (dir_node == new_node) {
 		VREF(dir_vnode);	/* we want ourself, ie "." */
 		*result_vnode = dir_vnode;
 	} else {
 		devfs_dntovn(new_node,result_vnode);
 		if (!lockparent || (flags & ISLASTCN))
 			VOP_UNLOCK(dir_vnode, 0, p);
 	}
 
 DBPRINT(("GOT\n"));
 	return (0);
 }
 
 /*
  */
 
 static int
 devfs_access(struct vop_access_args *ap)
         /*struct vop_access_args  {
                 struct vnode *a_vp;
                 int  a_mode;
                 struct ucred *a_cred;
                 struct proc *a_p;
         } */ 
 {
 	/*
  	 *  mode is filled with a combination of VREAD, VWRITE,
  	 *  and/or VEXEC bits turned on.  In an octal number these
  	 *  are the Y in 0Y00.
  	 */
 	struct vnode *vp = ap->a_vp;
 	int mode = ap->a_mode;
 	struct ucred *cred = ap->a_cred;
 	dn_p	file_node;
 	int	error;
 	gid_t	*gp;
 	int 	i;
 
 DBPRINT(("access\n"));
 	if ((error = devfs_vntodn(vp,&file_node)) != 0)
 	{
 		printf("devfs_vntodn returned %d ",error);
 		return error;
 	}
 
 	/* 
 	 * if we are not running as a process, we are in the 
 	 * kernel and we DO have permission
 	 */
 	if (ap->a_p == NULL)
 		return 0;
 
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != file_node->uid)
 	{
 		/* failing that.. try groups */
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 		{
 			if (file_node->gid == *gp)
 			{
 				goto found;
 			}
 		}
 		/* failing that.. try general access */
 		mode >>= 3;
 found:
 		;
 	}
 	if ((file_node->mode & mode) == mode)
 		return (0);
 	/*
 	 *  Root gets to do anything.
 	 * but only use suser_xxx prives as a last resort
 	 * (Use of super powers is recorded in ap->a_p->p_acflag)
 	 */
 	if( suser_xxx(cred, ap->a_p, 0) == 0) /* XXX what if no proc? */
 		return 0;
 	return (EACCES);
 }
 
 static int
 devfs_getattr(struct vop_getattr_args *ap)
         /*struct vop_getattr_args {
                 struct vnode *a_vp;
                 struct vattr *a_vap;
                 struct ucred *a_cred;
                 struct proc *a_p;
         } */ 
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	dn_p	file_node;
 	int	error;
 
 DBPRINT(("getattr\n"));
 	if ((error = devfs_vntodn(vp,&file_node)) != 0)
 	{
 		printf("devfs_vntodn returned %d ",error);
 		return error;
 	}
 	vap->va_rdev = 0;/* default value only */
 	vap->va_mode = file_node->mode;
 	switch (file_node->type)
 	{
 	case 	DEV_DIR:
 		vap->va_rdev = (udev_t)file_node->dvm;
 		vap->va_mode |= (S_IFDIR);
 		break;
 	case	DEV_CDEV:
 		vap->va_rdev = dev2udev(file_node->by.Cdev.dev);
 		vap->va_mode |= (S_IFCHR);
 		break;
 	case	DEV_BDEV:
 		vap->va_rdev = dev2udev(file_node->by.Bdev.dev);
 		vap->va_mode |= (S_IFBLK);
 		break;
 	case	DEV_SLNK:
 		break;
 	}
 	vap->va_type = vp->v_type;
 	vap->va_nlink = file_node->links;
 	vap->va_uid = file_node->uid;
 	vap->va_gid = file_node->gid;
 	vap->va_fsid = (intptr_t)(void *)file_node->dvm;
 	vap->va_fileid = (intptr_t)(void *)file_node;
 	vap->va_size = file_node->len; /* now a u_quad_t */
 	vap->va_blocksize = 512;
 	/*
 	 * XXX If the node times are in  Jan 1, 1970, then
 	 * update them to the boot time.
 	 * When we made the node, the date/time was not yet known.
 	 */
 	if(file_node->ctime.tv_sec < (24 * 3600))
 	{
 		TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->ctime));
 		TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->mtime));
 		TIMEVAL_TO_TIMESPEC(&boottime,&(file_node->atime));
 	}
 	if (file_node->flags & IN_ACCESS) {
 		nanotime(&file_node->atime);
 		file_node->flags &= ~IN_ACCESS;
 	}
 	vap->va_ctime = file_node->ctime;
 	vap->va_mtime = file_node->mtime;
 	vap->va_atime = file_node->atime;
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_bytes = file_node->len;		/* u_quad_t */
 	vap->va_filerev = 0; /* XXX */		/* u_quad_t */
 	vap->va_vaflags = 0; /* XXX */
 	return 0;
 }
 
 static int
 devfs_setattr(struct vop_setattr_args *ap)
         /*struct vop_setattr_args  {
                 struct vnode *a_vp;
                 struct vattr *a_vap;
                 struct ucred *a_cred;
                 struct proc *a_p;
         } */ 
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct proc *p = ap->a_p;
 	int error = 0;
 	gid_t *gp;
 	int i;
 	dn_p	file_node;
 
 	if (vap->va_flags != VNOVAL)	/* XXX needs to be implemented */
 		return (EOPNOTSUPP);
 
 	if ((error = devfs_vntodn(vp,&file_node)) != 0)
 	{
 		printf("devfs_vntodn returned %d ",error);
 		return error;
 	}
 DBPRINT(("setattr\n"));
 	if ((vap->va_type != VNON)  ||
 	    (vap->va_nlink != VNOVAL)  ||
 	    (vap->va_fsid != VNOVAL)  ||
 	    (vap->va_fileid != VNOVAL)  ||
 	    (vap->va_blocksize != VNOVAL)  ||
 	    (vap->va_rdev != VNOVAL)  ||
 	    (vap->va_bytes != VNOVAL)  ||
 	    (vap->va_gen != VNOVAL ))
 	{
 		return EINVAL;
 	}
 
 
 	/* 
 	 * Anyone can touch the files in such a way that the times are set
 	 * to NOW (e.g. run 'touch') if they have write permissions
 	 * however only the owner or root can set "un-natural times.
 	 * They also don't need write permissions.
 	 */
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 #if 0		/*
 		 * This next test is pointless under devfs for now..
 		 * as there is only one devfs hiding under potentially many
 		 * mountpoints and actual device node are really 'mounted' under
 		 * a FAKE mountpoint inside the kernel only, no matter where it
 		 * APPEARS they are mounted to the outside world..
 		 * A readonly devfs doesn't exist anyway.
 		 */
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 #endif
 		if (((vap->va_vaflags & VA_UTIMES_NULL) == 0) &&
 		    (cred->cr_uid != file_node->uid)  &&
 		    suser_xxx(cred, p, 0))
 			return (EPERM);
 		    if(VOP_ACCESS(vp, VWRITE, cred, p))
 			return (EACCES);
 		file_node->atime = vap->va_atime;
 		file_node->mtime = vap->va_mtime;
 		nanotime(&file_node->ctime);
 		return (0);
 	}
 
 	/*
 	 * Change the permissions.. must be root or owner to do this.
 	 */
 	if (vap->va_mode != (u_short)VNOVAL) {
 		if ((cred->cr_uid != file_node->uid)
 		 && suser_xxx(cred, p, 0))
 			return (EPERM);
 		/* set drwxwxrwx stuff */
 		file_node->mode &= ~07777;
 		file_node->mode |= vap->va_mode & 07777;
 	}
 
 	/*
 	 * Change the owner.. must be root to do this.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL) {
 		if (suser_xxx(cred, p, 0))
 			return (EPERM);
 		file_node->uid = vap->va_uid;
 	}
 
 	/*
 	 * Change the group.. must be root or owner to do this.
 	 * If we are the owner, we must be in the target group too.
 	 * don't use suser_xxx() unless you have to as it reports
 	 * whether you needed suser_xxx powers or not.
 	 */
 	if (vap->va_gid != (gid_t)VNOVAL) {
 		if (cred->cr_uid == file_node->uid){
 			gp = cred->cr_groups;
 			for (i = 0; i < cred->cr_ngroups; i++, gp++) {
 				if (vap->va_gid == *gp)
 					goto cando; 
 			}
 		}
 		/*
 		 * we can't do it with normal privs,
 		 * do we have an ace up our sleeve?
 		 */
 	 	if( suser_xxx(cred, p, 0))
 			return (EPERM);
 cando:
 		file_node->gid = vap->va_gid;
 	}
 #if 0
 	/*
  	 * Copied from somewhere else
 	 * but only kept as a marker and reminder of the fact that
 	 * flags should be handled some day
 	 */
 	if (vap->va_flags != VNOVAL) {
 		if (error = suser_xxx(cred, p, 0))
 			return error;
 		if (cred->cr_uid == 0)
 		;
 		else {
 		}
 	}
 #endif
 	return error;
 }
 
 
 static int
 devfs_xread(struct vop_read_args *ap)
         /*struct vop_read_args {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
                 struct ucred *a_cred;
         } */
 {
 	int	error = 0;
 	dn_p	file_node;
 
 DBPRINT(("read\n"));
 	if ((error = devfs_vntodn(ap->a_vp,&file_node)) != 0)
 	{
 		printf("devfs_vntodn returned %d ",error);
 		return error;
 	}
 
 
 	switch (ap->a_vp->v_type) {
 	case VREG:
 		return(EINVAL);
 	case VDIR:
 		return VOP_READDIR(ap->a_vp,ap->a_uio,ap->a_cred,
 					NULL,NULL,NULL);
 	case VCHR:
 	case VBLK:
 		panic("devfs:  vnode methods");
 
 	default:
 		panic("devfs_read(): bad file type");
 		break;
 	}
 }
 
 /*
  *  Write data to a file or directory.
  */
 static int
 devfs_xwrite(struct vop_write_args *ap)
         /*struct vop_write_args  {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 int  a_ioflag;
                 struct ucred *a_cred;
         } */
 {
 	switch (ap->a_vp->v_type) {
 	case VREG:
 		return(EINVAL);
 	case VDIR:
 		return(EISDIR);
 	case VCHR:
 	case VBLK:
 		panic("devfs:  vnode methods");
 	default:
 		panic("devfs_write(): bad file type");
 	}
 }
 
 
 static int
 devfs_remove(struct vop_remove_args *ap)
         /*struct vop_remove_args  {
                 struct vnode *a_dvp;
                 struct vnode *a_vp;
                 struct componentname *a_cnp;
         } */ 
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	dn_p  tp, tdp;
 	devnm_p tnp;
 	int doingdirectory = 0;
 	int error = 0;
 	uid_t ouruid = cnp->cn_cred->cr_uid;
 
 
 DBPRINT(("remove\n"));
 	/*
 	 * Lock our directories and get our name pointers
 	 * assume that the names are null terminated as they
 	 * are the end of the path. Get pointers to all our
 	 * devfs structures.
 	 */
 	if ((error = devfs_vntodn(dvp, &tdp)) != 0) {
 abortit:
 		VOP_ABORTOP(dvp, cnp); 
 		return (error);
 	}
 	if ((error = devfs_vntodn(vp, &tp)) != 0) goto abortit;
 	/*
 	 * Assuming we are atomic, dev_lookup left this for us
 	 */
 	tnp = tp->last_lookup;
 	
 
 	/*
 	 * Check we are doing legal things WRT the new flags
 	 */
 	if ((tp->flags & (IMMUTABLE | APPEND))
 	  || (tdp->flags & APPEND) /*XXX eh?*/ ) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Make sure that we don't try do something stupid
 	 */
 	if ((tp->type) == DEV_DIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ( (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 
 		    || (cnp->cn_flags&ISDOTDOT) ) {
 			error = EINVAL;
 			goto abortit;
 		}
 		doingdirectory++;
 	}
 
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
 	getnanotime(&(tdp->mtime));
 
 
 	/*
 	 * own the parent directory, or the destination of the rename,
 	 * otherwise the destination may not be changed (except by
 	 * root). This implements append-only directories.
 	 * XXX shoudn't this be in generic code? 
 	 */
 	if ((tdp->mode & S_ISTXT)
 	  && ouruid != 0
 	  && ouruid != tdp->uid
 	  && ouruid != tp->uid ) {
 		error = EPERM;
 		goto abortit;
 	}
 	/*
 	 * Target must be empty if a directory and have no links
 	 * to it. Also, ensure source and target are compatible
 	 * (both directories, or both not directories).
 	 */
 	if (( doingdirectory) && (tp->links > 2)) {
 			printf("nlink = %d\n",tp->links); /*XXX*/
 			error = ENOTEMPTY;
 			goto abortit;
 	}
 	dev_free_name(tnp);
 	tp = NULL;
 	return (error);
 }
 
 /*
  */
 static int
 devfs_link(struct vop_link_args *ap)
         /*struct vop_link_args  {
                 struct vnode *a_tdvp;
                 struct vnode *a_vp;
                 struct componentname *a_cnp;
         } */ 
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	dn_p  fp, tdp;
 	devnm_p tnp;
 	int error = 0;
 
 DBPRINT(("link\n"));
 	/*
 	 * First catch an arbitrary restriction for this FS
 	 */
 	if(cnp->cn_namelen > DEVMAXNAMESIZE) {
 		error = ENAMETOOLONG;
 		goto abortit;
 	}
 
 	/*
 	 * Lock our directories and get our name pointers
 	 * assume that the names are null terminated as they
 	 * are the end of the path. Get pointers to all our
 	 * devfs structures.
 	 */
 	if ((error = devfs_vntodn(tdvp,&tdp)) != 0) goto abortit;
 	if ((error = devfs_vntodn(vp,&fp)) != 0) goto abortit;
 	
 	/*
 	 * trying to move it out of devfs? (v_tag == VT_DEVFS)
 	 */
 	if ( (vp->v_tag != VT_DEVFS)
 	 || (vp->v_tag != tdvp->v_tag) ) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, cnp); 
 		goto out;
 	}
 
 	/*
 	 * Check we are doing legal things WRT the new flags
 	 */
 	if (fp->flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
 	getnanotime(&(tdp->atime));
 	error = dev_add_name(cnp->cn_nameptr,
 			tdp,
 			NULL,
 			fp,
 			&tnp);
 out:
 	return (error);
 
 }
 
 /*
  * Rename system call. Seems overly complicated to me...
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.
  *
  * When the target exists, both the directory
  * and target vnodes are locked.
  * the source and source-parent vnodes are referenced
  *
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to node if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 devfs_rename(struct vop_rename_args *ap)
         /*struct vop_rename_args  {
                 struct vnode *a_fdvp;
                 struct vnode *a_fvp;
                 struct componentname *a_fcnp;
                 struct vnode *a_tdvp;
                 struct vnode *a_tvp;
                 struct componentname *a_tcnp;
         } */
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct proc *p = fcnp->cn_proc;
 	dn_p fp, fdp, tp, tdp;
 	devnm_p fnp,tnp;
 	int doingdirectory = 0;
 	int error = 0;
 
 	/*
 	 * First catch an arbitrary restriction for this FS
 	 */
 	if(tcnp->cn_namelen > DEVMAXNAMESIZE) {
 		error = ENAMETOOLONG;
 		goto abortit;
 	}
 
 	/*
 	 * Lock our directories and get our name pointers
 	 * assume that the names are null terminated as they
 	 * are the end of the path. Get pointers to all our
 	 * devfs structures.
 	 */
 	if ((error = devfs_vntodn(tdvp,&tdp)) != 0) goto abortit;
 	if ((error = devfs_vntodn(fdvp,&fdp)) != 0) goto abortit;
 	if ((error = devfs_vntodn(fvp,&fp)) != 0) goto abortit;
 	fnp = fp->last_lookup;
 	if (tvp) {
 		if ((error = devfs_vntodn(tvp,&tp)) != 0) goto abortit;
 		tnp = tp->last_lookup;
 	} else {
 		tp = NULL;
 		tnp = NULL;
 	}
 	
 	/*
 	 * trying to move it out of devfs? (v_tag == VT_DEVFS)
          * if we move a dir across mnt points. we need to fix all
 	 * the mountpoint pointers! XXX
 	 * so for now keep dirs within the same mount
 	 */
 	if ( (fvp->v_tag != VT_DEVFS)
 	 || (fvp->v_tag != tdvp->v_tag)
 	 || (tvp && (fvp->v_tag != tvp->v_tag))
 	 || ((fp->type == DEV_DIR) && (fp->dvm != tdp->dvm ))) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, tcnp); 
 		if (tdvp == tvp) /* eh? */
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	/*
 	 * Check we are doing legal things WRT the new flags
 	 */
 	if ((tp && (tp->flags & (IMMUTABLE | APPEND)))
 	  || (fp->flags & (IMMUTABLE | APPEND))
 	  || (fdp->flags & APPEND)) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Make sure that we don't try do something stupid
 	 */
 	if ((fp->type) == DEV_DIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') 
 		    || (fcnp->cn_flags&ISDOTDOT) 
 		    || (tcnp->cn_namelen == 1 && tcnp->cn_nameptr[0] == '.') 
 		    || (tcnp->cn_flags&ISDOTDOT) 
 		    || (tdp == fp )) {
 			error = EINVAL;
 			goto abortit;
 		}
 		doingdirectory++;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory heirarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". 
 	 */
 	if (doingdirectory && (tdp != fdp)) {
 		dn_p tmp,ntmp;
 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 		tmp = tdp;
 		do {
 			if(tmp == fp) {
 				/* XXX unlock stuff here probably */
 				error = EINVAL;
 				goto out;
 			}
 			ntmp = tmp;
 		} while ((tmp = tmp->by.Dir.parent) != ntmp);
 	}
 
 	/***********************************
 	 * Start actually doing things.... *
 	 ***********************************/
 	getnanotime(&(fp->atime));
 	/*
 	 * Check if just deleting a link name.
 	 */
 	if (fvp == tvp) {
 		if (fvp->v_type == VDIR) {
 			error = EINVAL;
 			goto abortit;
 		}
 
 		/* Release destination completely. */
 		VOP_ABORTOP(tdvp, tcnp);
 		vput(tdvp);
 		vput(tvp);
 
 		/* Delete source. */
 		VOP_ABORTOP(fdvp, fcnp); /*XXX*/
 		vrele(fdvp);
 		vrele(fvp);
 		dev_free_name(fnp);
 		return 0;
 	}
 
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work,  too bad :)
 	 */
 	fp->links++;
 	/*
 	 * If the target exists zap it (unless it's a non-empty directory)
 	 * We could do that as well but won't
  	 */
 	if (tp) {
 		int ouruid = tcnp->cn_cred->cr_uid;
 		/*
 		 * If the parent directory is "sticky", then the user must
 		 * own the parent directory, or the destination of the rename,
 		 * otherwise the destination may not be changed (except by
 		 * root). This implements append-only directories.
 		 * XXX shoudn't this be in generic code? 
 		 */
 		if ((tdp->mode & S_ISTXT)
 		  && ouruid != 0
 		  && ouruid != tdp->uid
 		  && ouruid != tp->uid ) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if (( doingdirectory) && (tp->links > 2)) {
 				printf("nlink = %d\n",tp->links); /*XXX*/
 				error = ENOTEMPTY;
 				goto bad;
 		}
 		dev_free_name(tnp);
 		tp = NULL;
 	}
 	dev_add_name(tcnp->cn_nameptr,tdp,fnp->as.front.realthing,fp,&tnp);
 	fnp->dnp = NULL;
 	fp->links--; /* one less link to it.. */
 	dev_free_name(fnp);
 	fp->links--; /* we added one earlier*/
 	if (tdp)
 		vput(tdvp);
 	if (tp)
 		vput(fvp);
 	vrele(ap->a_fvp);
 	return (error);
 
 bad:
 	if (tp)
 		vput(tvp);
 	vput(tdvp);
 out:
 	if (vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p) == 0) {
 		fp->links--; /* we added one earlier*/
 		vput(fvp);
 	} else
 		vrele(fvp);
 	return (error);
 }
 
 static int
 devfs_symlink(struct vop_symlink_args *ap)
         /*struct vop_symlink_args {
                 struct vnode *a_dvp;
                 struct vnode **a_vpp;
                 struct componentname *a_cnp;
                 struct vattr *a_vap;
                 char *a_target;
         } */
 {
 	struct vnode *vp;
 	int error;
 	dn_p dnp;
 	union typeinfo by;
 	devnm_p nm_p;
 
 DBPRINT(("symlink\n"));
 	if((error = devfs_vntodn(ap->a_dvp, &dnp)) != 0) {
 		return (error);
 	}
 		
 	by.Slnk.name = ap->a_target;
 	by.Slnk.namelen = strlen(ap->a_target);
 	dev_add_entry(ap->a_cnp->cn_nameptr, dnp, DEV_SLNK, &by,
 		NULL, NULL, &nm_p);
 	if((error = devfs_dntovn(nm_p->dnp, &vp)) != 0) {
 		return (error);
 	}
 	VOP_SETATTR(vp, ap->a_vap, ap->a_cnp->cn_cred, ap->a_cnp->cn_proc);
 	*ap->a_vpp = NULL;
 	vput(vp);
 	return 0;
 }
 
 /*
  * Vnode op for readdir
  */
 static int
 devfs_readdir(struct vop_readdir_args *ap)
         /*struct vop_readdir_args {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 struct ucred *a_cred;
         	int *eofflag;
         	int *ncookies;
         	u_int **cookies;
         } */
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct dirent dirent;
 	dn_p dir_node;
 	devnm_p	name_node;
 	char	*name;
 	int error = 0;
 	int reclen;
 	int nodenumber;
 	int	startpos,pos;
 
 DBPRINT(("readdir\n"));
 
 /*  set up refs to dir */
 	if ((error = devfs_vntodn(vp,&dir_node)) != 0)
 		return error;
 	if(dir_node->type != DEV_DIR)
 		return(ENOTDIR);
 
 	pos = 0;
 	startpos = uio->uio_offset;
 	name_node = dir_node->by.Dir.dirlist;
 	nodenumber = 0;
 	getnanotime(&(dir_node->atime));
 
 	while ((name_node || (nodenumber < 2)) && (uio->uio_resid > 0))
 	{
 		switch(nodenumber)
 		{
 		case	0:
 			dirent.d_fileno = (uintptr_t)(void *)dir_node;
 			name = ".";
 			dirent.d_namlen = 1;
 			dirent.d_type = DT_DIR;
 			break;
 		case	1:
 			if(dir_node->by.Dir.parent)
 				dirent.d_fileno
 				 = (uintptr_t)(void *)dir_node->by.Dir.parent;
 			else
 				dirent.d_fileno = (uintptr_t)(void *)dir_node;
 			name = "..";
 			dirent.d_namlen = 2;
 			dirent.d_type = DT_DIR;
 			break;
 		default:
 			dirent.d_fileno = (uintptr_t)(void *)name_node->dnp;
 			dirent.d_namlen = strlen(name_node->name);
 			name = name_node->name;
 			switch(name_node->dnp->type) {
 			case DEV_BDEV:
 				dirent.d_type = DT_BLK;
 				break;
 			case DEV_CDEV:
 				dirent.d_type = DT_CHR;
 				break;
 			case DEV_DDEV:
 				dirent.d_type = DT_SOCK; /*XXX*/
 				break;
 			case DEV_DIR:
 				dirent.d_type = DT_DIR;
 				break;
 			case DEV_SLNK:
 				dirent.d_type = DT_LNK;
 				break;
 			default:
 				dirent.d_type = DT_UNKNOWN;
 			}
 		}
 
 		reclen = dirent.d_reclen = GENERIC_DIRSIZ(&dirent);
 
 		if(pos >= startpos)	/* made it to the offset yet? */
 		{
 			if (uio->uio_resid < reclen) /* will it fit? */
 				break;
 			strcpy( dirent.d_name,name);
 			if ((error = uiomove ((caddr_t)&dirent,
 					dirent.d_reclen, uio)) != 0)
 				break;
 		}
 		pos += reclen;
 		if((nodenumber >1) && name_node)
 			name_node = name_node->next;
 		nodenumber++;
 	}
 	uio->uio_offset = pos;
 
 	return (error);
 }
 
 
 /*
  */
 static int
 devfs_readlink(struct vop_readlink_args *ap)
         /*struct vop_readlink_args {
                 struct vnode *a_vp;
                 struct uio *a_uio;
                 struct ucred *a_cred;
         } */
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	dn_p lnk_node;
 	int error = 0;
 
 
 DBPRINT(("readlink\n"));
 /*  set up refs to dir */
 	if ((error = devfs_vntodn(vp,&lnk_node)) != 0)
 		return error;
 	if(lnk_node->type != DEV_SLNK)
 		return(EINVAL);
 	if ((error = VOP_ACCESS(vp, VREAD, ap->a_cred, NULL)) != 0) { /* XXX */
 		return error;
 	}
 	error = uiomove(lnk_node->by.Slnk.name, lnk_node->by.Slnk.namelen, uio);
 	return error;
 }
 
 #ifdef notyet
 static int
 devfs_abortop(struct vop_abortop_args *ap)
         /*struct vop_abortop_args {
                 struct vnode *a_dvp;
                 struct componentname *a_cnp;
         } */
 {
 DBPRINT(("abortop\n"));
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return 0;
 }
 #endif /* notyet */
 
 
 static int
 devfs_reclaim(struct vop_reclaim_args *ap)
         /*struct vop_reclaim_args {
 		struct vnode *a_vp;
         } */
 {
 	dn_p	file_node = NULL;
 	int	error;
 
 DBPRINT(("reclaim\n"));
 	if ((error = devfs_vntodn(ap->a_vp,&file_node)) != 0)
 	{
 		printf("devfs_vntodn returned %d ",error);
 		return error;
 	}
 
 	ap->a_vp->v_data = NULL;
 	if (file_node) {
 		file_node->vn = 0;
 		file_node->vn_id = 0;
 	}
 	return(0);
 }
 
 /*
  * Print out the contents of a /devfs vnode.
  */
 static int
 devfs_print(struct vop_print_args *ap)
 	/*struct vop_print_args {
 		struct vnode *a_vp;
 	} */
 {
 
 	printf("tag VT_DEVFS, devfs vnode\n");
 	return (0);
 }
 
 /**************************************************************************\
 * pseudo ops *
 \**************************************************************************/
 
 /*proto*/
 void
 devfs_dropvnode(dn_p dnp)
 {
 	struct vnode *vn_p;
 
 #ifdef PARANOID
 	if(!dnp)
 	{
 		printf("devfs: dn count dropped too early\n");
 	}
 #endif
 	vn_p = dnp->vn;
 	/*
 	 * check if we have a vnode.......
 	 */
 	if((vn_p) && ( dnp->vn_id == vn_p->v_id) && (dnp == (dn_p)vn_p->v_data))
 	{
 		VOP_REVOKE(vn_p, REVOKEALL);
 	}
 	dnp->vn = NULL; /* be pedantic about this */
 }
 
 /* struct vnode *speclisth[SPECHSZ];*/ /* till specfs goes away */
 
 /*
  * Open a special file.
 	struct vop_open_args {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} *ap;
  */
 /* ARGSUSED */
 static int
 devfs_open( struct vop_open_args *ap)
 {
 	struct proc *p = ap->a_p;
 	struct vnode *vp = ap->a_vp;
 	int error;
 	dn_p	dnp;
 
 	if ((error = devfs_vntodn(vp,&dnp)) != 0)
 		return error;
 
 	switch (vp->v_type) {
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*dnp->by.Cdev.cdevsw->d_open)(
 					dnp->by.Cdev.dev,
 					ap->a_mode,
 					S_IFCHR,
 					p);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 		/* NOT REACHED */
 	case VBLK:
 		error = (*dnp->by.Bdev.bdevsw->d_open)(
 					dnp->by.Bdev.dev,
 					ap->a_mode,
 					S_IFBLK,
 					p);
 		break;
 	default:
 		break;
 	}
 	return (error);
 }
 
 /*
  * Vnode op for read
 	struct vop_read_args {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} 
 
  */
 /* ARGSUSED */
 static int
 devfs_read( struct vop_read_args *ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
  	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn, nextbn;
 	long bsize, bscale;
 	struct partinfo dpart;
 	int n, on;
 	d_ioctl_t *ioctl;
 	int error = 0;
 	dev_t dev;
 	dn_p	dnp;
 
 	if ((error = devfs_vntodn(vp,&dnp)) != 0)
 		return error;
 
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("devfs_read mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("devfs_read proc");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*dnp->by.Cdev.cdevsw->d_read)
 			(dnp->by.Cdev.dev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		break;
 
 	case VBLK:
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		dev = dnp->by.Bdev.dev;
 		/*
 		 * This is a hack!
 		 */
 		if ( (ioctl = dnp->by.Bdev.bdevsw->d_ioctl) != NULL &&
 		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
 		    dpart.part->p_fstype == FS_BSDFFS &&
 		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		bscale = btodb(bsize);
 		/* 
 		 * Get buffers with this data from the buffer cache.
 		 * If it's not there the strategy() entrypoint will be called.
 		 * We may do this in several chunks.
 		 */
 		do {
 			bn = btodb(uio->uio_offset) & ~(bscale - 1);
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (vp->v_lastr + bscale == bn) {
 				nextbn = bn + bscale;
 				error = breadn(vp, bn, (int)bsize, &nextbn,
 					(int *)&bsize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
 			vp->v_lastr = bn;
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			/* 
 			 * Copy it to the user's space
 			 */
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			brelse(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		break;
 
 	default:
 		panic("devfs_read type");
 	}
 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
 		dnp->flags |= IN_ACCESS;        
 	return (error);
 }
 
 /*
  * Vnode op for write
 	struct vop_write_args  {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	}
  */
 /* ARGSUSED */
 static int
 devfs_write( struct vop_write_args *ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn;
 	int bsize, blkmask;
 	struct partinfo dpart;
 	register int n, on;
 	int error = 0;
 	dn_p	dnp;
 
 	if ((error = devfs_vntodn(vp,&dnp)) != 0)
 		return error;
 
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("devfs_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("devfs_write proc");
 #endif
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*dnp->by.Cdev.cdevsw->d_write)
 			(dnp->by.Cdev.dev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_resid == 0)
 			return (0);
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		if ((dnp->by.Bdev.bdevsw->d_ioctl != NULL)
 		&& ((*dnp->by.Bdev.bdevsw->d_ioctl)(dnp->by.Bdev.dev, DIOCGPART,
 					(caddr_t)&dpart, FREAD, p) == 0)
 		&& (dpart.part->p_fstype == FS_BSDFFS)
 		&& (dpart.part->p_frag != 0)
 		&& (dpart.part->p_fsize != 0)) {
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		}
 		blkmask = btodb(bsize) - 1;
 		do {
 			bn = btodb(uio->uio_offset) & ~blkmask;
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (n == bsize)
 				bp = getblk(vp, bn, bsize, 0, 0);
 			else
 				error = bread(vp, bn, bsize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			n = min(n, bsize - bp->b_resid);
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			if (n + on == bsize)
 				bawrite(bp);
 			else
 				bdwrite(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("devfs_write type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Device ioctl operation.
 	struct vop_ioctl_args {
 		struct vnode *a_vp;
 		int  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	}
  */
 /* ARGSUSED */
 static int
 devfs_ioctl(struct vop_ioctl_args *ap)
 {
 	dn_p	dnp;
 	int	error;
 
 	if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
 		return error;
 
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return ((*dnp->by.Cdev.cdevsw->d_ioctl)(dnp->by.Cdev.dev,
 					ap->a_command,
 					ap->a_data,
 					ap->a_fflag,
 					ap->a_p));
 	case VBLK:
 		return ((*dnp->by.Bdev.bdevsw->d_ioctl)(dnp->by.Bdev.dev,
 					ap->a_command,
 					ap->a_data,
 					ap->a_fflag,
 					ap->a_p));
 	default:
 		panic("devfs_ioctl");
 		/* NOTREACHED */
 	}
 }
 
 /*
 	struct vop_poll_args {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} *ap;
 */
 /* ARGSUSED */
 static int
 devfs_poll(struct vop_poll_args *ap)
 {
 	dn_p	dnp;
 	int	error;
 
 	if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
 		return error;
 
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return (*dnp->by.Cdev.cdevsw->d_poll)(dnp->by.Cdev.dev,
 					ap->a_events,
 					ap->a_p);
 	default:
 		return (vop_defaultop((struct vop_generic_args *)ap));
 
 	}
 }
 /*
  * Synch buffers associated with a block device
 	struct vop_fsync_args {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int  a_waitfor;
 		struct proc *a_p;
 	} 
  */
 /* ARGSUSED */
 static int
 devfs_fsync(struct vop_fsync_args *ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct buf *bp;
 	struct buf *nbp;
 	int s;
 	dn_p	dnp;
 	int	error;
 
 	if ((error = devfs_vntodn(vp,&dnp)) != 0)
 		return error;
 
 
 	if (vp->v_type == VCHR)
 		return (0);
 	/*
 	 * Flush all dirty buffers associated with a block device.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("devfs_fsync: not dirty");
 		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
 			BUF_UNLOCK(bp);
 			vfs_bio_awrite(bp);
 			splx(s);
 		} else {
 			bremfree(bp);
 			splx(s);
 			bawrite(bp);
 		}
 		goto loop;
 	}
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
 		}
 #ifdef DIAGNOSTIC
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			vprint("devfs_fsync: dirty", vp);
 			splx(s);
 			goto loop;
 		}
 #endif
 	}
 	splx(s);
 	return (0);
 }
 /*
  *
  *	struct vop_inactive_args {
  *		struct vnode *a_vp;
  *		struct proc *a_p;
  *	} 
  */
 
 static int
 devfs_inactive(struct vop_inactive_args *ap)
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Just call the device strategy routine
 	struct vop_strategy_args {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	}
  */
 static int
 devfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp = ap->a_bp;
 	dn_p	dnp;
 	int	error;
 
 	if ((ap->a_vp->v_type != VCHR)
 	&&  (ap->a_vp->v_type != VBLK))
 		panic ("devfs_strat:badvnode type");
 	if ((error = devfs_vntodn(ap->a_vp,&dnp)) != 0)
 		return error;
 
 
 	if (((bp->b_flags & B_READ) == 0) &&
 		(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
 		(*bioops.io_start)(bp);
 	switch (ap->a_vp->v_type) {
 	case VCHR:
 		(*dnp->by.Cdev.cdevsw->d_strategy)(bp);
 		break;
 	case VBLK:
 		(*dnp->by.Bdev.bdevsw->d_strategy)(bp);
 		break;
 	default:
 		/* XXX set error code? */
 		break;
 	}
 	return (0);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
 	struct vop_bmap_args  {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	}
  */
 static int
 devfs_bmap(struct vop_bmap_args *ap)
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Device close routine
 	struct vop_close_args {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	}
  */
 /* ARGSUSED */
 static int
 devfs_close(struct vop_close_args *ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	int error;
 	dn_p dnp;
 
 	if ((error = devfs_vntodn(vp,&dnp)) != 0)
 		return error;
 
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		/*
 		 * Hack: a tty device that is a controlling terminal
 		 * has a reference from the session structure.
 		 * We cannot easily tell that a character device is
 		 * a controlling terminal, unless it is the closing
 		 * process' controlling terminal.  In that case,
 		 * if the reference count is 2 (this last descriptor
 		 * plus the session), release the reference from the session.
 		 */
 		if (vcount(vp) == 2 && ap->a_p &&
 		    (vp->v_flag & VXLOCK) == 0 &&
 		    vp == ap->a_p->p_session->s_ttyvp) {
 			vrele(vp);
 			ap->a_p->p_session->s_ttyvp = NULL;
 		}
 		/*
 		 * If the vnode is locked, then we are in the midst
 		 * of forcably closing the device, otherwise we only
 		 * close on last reference.
 		 */
 		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 		return ((*dnp->by.Cdev.cdevsw->d_close)(dnp->by.Cdev.dev,
 						ap->a_fflag,
 						S_IFCHR,
 						ap->a_p));
 		/* NOT REACHED */
 	case VBLK:
 		/*
 		 * On last close of a block device (that isn't mounted)
 		 * we must invalidate any in core blocks, so that
 		 * we can, for instance, change floppy disks.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
 		VOP_UNLOCK(vp, 0, ap->a_p);
 		if (error)
 			return (error);
 
 		/*
 		 * We do not want to really close the device if it
 		 * is still in use unless we are trying to close it
 		 * forcibly. Since every use (buffer, vnode, swap, cmap)
 		 * holds a reference to the vnode, and because we mark
 		 * any other vnodes that alias this device, when the
 		 * sum of the reference counts on all the aliased
 		 * vnodes descends to one, we are on last close.
 		 */
 		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 
 		return ((*dnp->by.Bdev.bdevsw->d_close)(dnp->by.Bdev.dev,
 						ap->a_fflag,
 						S_IFBLK,
 						ap->a_p));
 		/* NOT REACHED */
 	default:
 		panic("devfs_close: not special");
 	}
 }
 
 /*
  * Print out the contents of a special device vnode.
 	struct vop_print_args {
 		struct vnode *a_vp;
 	}
  */
 
 /*
  * Special device advisory byte-level locks.
 	struct vop_advlock_args {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	}
  */
 /* ARGSUSED */
 static int
 devfs_advlock(struct vop_advlock_args *ap)
 {
 
 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
 }
 
 /*
  * Special device bad operation
  */
 static int
 devfs_badop(void)
 {
 
 	panic("devfs_badop called");
 	/* NOTREACHED */
 }
 
 static void
 devfs_getpages_iodone(struct buf *bp)
 {
 
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 static int
 devfs_getpages(struct vop_getpages_args *ap)
 {
 	vm_offset_t kva;
 	int error;
 	int i, pcount, size, s;
 	daddr_t blkno;
 	struct buf *bp;
 	vm_page_t m;
 	vm_ooffset_t offset;
 	int toff, nextoff, nread;
 	struct vnode *vp = ap->a_vp;
 	int blksiz;
 	int gotreqpage;
 
 	error = 0;
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 
 	/*
 	 * Calculate the offset of the transfer.
 	 */
 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
 
 	/* XXX sanity check before we go into details. */
 	/* XXX limits should be defined elsewhere. */
 #define	DADDR_T_BIT	32
 #define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
 	if (offset < 0 || offset > OFFSET_MAX) {
 		/* XXX still no %q in kernel. */
 		printf("devfs_getpages: preposterous offset 0x%x%08x\n",
 		       (u_int)((u_quad_t)offset >> 32),
 		       (u_int)(offset & 0xffffffff));
 		return (VM_PAGER_ERROR);
 	}
 
 	blkno = btodb(offset);
 
 	/*
 	 * Round up physical size for real devices, use the
 	 * fundamental blocksize of the fs if possible.
 	 */
 	if (vp && vp->v_mount) {
 		if (vp->v_type != VBLK) {
 			vprint("Non VBLK", vp);
 		}
 		blksiz = vp->v_mount->mnt_stat.f_bsize;
 		if (blksiz < DEV_BSIZE) {
 			blksiz = DEV_BSIZE;
 		}
 	}
 	else
 		blksiz = DEV_BSIZE;
 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
 
 	bp = getpbuf(NULL);
 	kva = (vm_offset_t)bp->b_data;
 
 	/*
 	 * Map the pages to be read into the kva.
 	 */
 	pmap_qenter(kva, ap->a_m, pcount);
 
 	/* Build a minimal buffer header. */
 	bp->b_flags = B_READ | B_CALL;
 	bp->b_iodone = devfs_getpages_iodone;
 
 	/* B_PHYS is not set, but it is nice to fill this in. */
 	bp->b_rcred = bp->b_wcred = curproc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = blkno;
 	bp->b_lblkno = blkno;
 	pbgetvp(ap->a_vp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 	bp->b_resid = 0;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += pcount;
 
 	/* Do the input. */
 	VOP_STRATEGY(bp->b_vp, bp);
 
 	s = splbio();
 
 	/* We definitely need to be at splbio here. */
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep(bp, PVM, "spread", 0);
 
 	splx(s);
 
 	if ((bp->b_flags & B_ERROR) != 0) {
 		if (bp->b_error)
 			error = bp->b_error;
 		else
 			error = EIO;
 	}
 
 	nread = size - bp->b_resid;
 
 	if (nread < ap->a_count) {
 		bzero((caddr_t)kva + nread,
 			ap->a_count - nread);
 	}
 	pmap_qremove(kva, pcount);
 
 
 	gotreqpage = 0;
 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= nread) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else if (toff < nread) {
 			int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
 			vm_page_set_validclean(m, 0, nvalid);
 		} else {
 			m->valid = 0;
 			m->dirty = 0;
 		}
 
 		if (i != ap->a_reqpage) {
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
 				if (m->valid) {
 					if (m->flags & PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 					vm_page_wakeup(m);
 				} else {
 					vm_page_free(m);
 				}
 			} else {
 				vm_page_free(m);
 			}
 		} else if (m->valid) {
 			gotreqpage = 1;
 		}
 	}
 	if (!gotreqpage) {
 		m = ap->a_m[ap->a_reqpage];
 #ifndef MAX_PERF
 		printf("devfs_getpages: I/O read failure: (error code=%d)\n", error);
 		printf("               size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
 				size, bp->b_resid, ap->a_count, m->valid);
 		printf("               nread: %d, reqpage: %d, pindex: %d, pcount: %d\n",
 				nread, ap->a_reqpage, m->pindex, pcount);
 #endif
 		/*
 		 * Free the buffer header back to the swap buffer pool.
 		 */
 		relpbuf(bp, NULL);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Free the buffer header back to the swap buffer pool.
 	 */
 	relpbuf(bp, NULL);
 	return VM_PAGER_OK;
 }
 
 
 
 /* These are the operations used by directories etc in a devfs */
 
 vop_t **devfs_vnodeop_p;
 static struct vnodeopv_entry_desc devfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) devfs_access },
 	{ &vop_bmap_desc,		(vop_t *) devfs_badop },
 	{ &vop_getattr_desc,		(vop_t *) devfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) devfs_inactive },
 	{ &vop_link_desc,		(vop_t *) devfs_link },
 	{ &vop_lookup_desc,		(vop_t *) devfs_lookup },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_print_desc,		(vop_t *) devfs_print },
 	{ &vop_read_desc,		(vop_t *) devfs_xread },
 	{ &vop_readdir_desc,		(vop_t *) devfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) devfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) devfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) devfs_remove },
 	{ &vop_rename_desc,		(vop_t *) devfs_rename },
 	{ &vop_setattr_desc,		(vop_t *) devfs_setattr },
 	{ &vop_symlink_desc,		(vop_t *) devfs_symlink },
 	{ &vop_write_desc,		(vop_t *) devfs_xwrite },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc devfs_vnodeop_opv_desc =
 	{ &devfs_vnodeop_p, devfs_vnodeop_entries };
 
 VNODEOP_SET(devfs_vnodeop_opv_desc);
 
 
 
 vop_t **devfs_spec_vnodeop_p;
 static struct vnodeopv_entry_desc devfs_spec_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) devfs_access },
 	{ &vop_advlock_desc,		(vop_t *) devfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) devfs_bmap },
 	{ &vop_close_desc,		(vop_t *) devfs_close },
 	{ &vop_create_desc,		(vop_t *) devfs_badop },
 	{ &vop_fsync_desc,		(vop_t *) devfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) devfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) devfs_getpages },
 	{ &vop_inactive_desc,		(vop_t *) devfs_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) devfs_ioctl },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) devfs_badop },
 	{ &vop_lookup_desc,		(vop_t *) devfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) devfs_badop },
 	{ &vop_mknod_desc,		(vop_t *) devfs_badop },
 	{ &vop_open_desc,		(vop_t *) devfs_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) devfs_poll },
 	{ &vop_print_desc,		(vop_t *) devfs_print },
 	{ &vop_read_desc,		(vop_t *) devfs_read },
 	{ &vop_readdir_desc,		(vop_t *) devfs_badop },
 	{ &vop_readlink_desc,		(vop_t *) devfs_badop },
 	{ &vop_reallocblks_desc,	(vop_t *) devfs_badop },
 	{ &vop_reclaim_desc,		(vop_t *) devfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) devfs_badop },
 	{ &vop_rename_desc,		(vop_t *) devfs_badop },
 	{ &vop_rmdir_desc,		(vop_t *) devfs_badop },
 	{ &vop_setattr_desc,		(vop_t *) devfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) devfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) devfs_symlink },
 	{ &vop_write_desc,		(vop_t *) devfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc devfs_spec_vnodeop_opv_desc =
 	{ &devfs_spec_vnodeop_p, devfs_spec_vnodeop_entries };
 
 VNODEOP_SET(devfs_spec_vnodeop_opv_desc);
 
Index: head/sys/miscfs/specfs/specdev.h
===================================================================
--- head/sys/miscfs/specfs/specdev.h	(revision 49534)
+++ head/sys/miscfs/specfs/specdev.h	(nonexistent)
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 1990, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *	This product includes software developed by the University of
- *	California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	@(#)specdev.h	8.6 (Berkeley) 5/21/95
- * $Id: specdev.h,v 1.17 1999/05/11 19:54:39 phk Exp $
- */
-
-/*
- * This structure defines the information maintained about
- * special devices. It is allocated in checkalias and freed
- * in vgone.
- */
-struct specinfo {
-	struct	mount *si_mountpoint;
-	int		si_bsize_phys;	/* minimum physical block size */
-	int		si_bsize_best;	/* optimal block size / VBLK */
-	int		si_bsize_max;	/* maximum block size */
-
-	udev_t		si_udev;
-	SLIST_ENTRY(specinfo)	si_hash;
-	struct vnode *si_hlist;
-};
-/*
- * Exported shorthand
- */
-#define v_hashchain v_specinfo->si_hlist
-#define v_specmountpoint v_specinfo->si_mountpoint
-
-/*
- * Special device management
- */
-#define	SPECHSZ	64
-#define	SPECHASH(rdev)	(((unsigned)(minor(rdev)))%SPECHSZ)
-
-
-/*
- * Prototypes for special file operations on vnodes.
- */
-extern	vop_t **spec_vnodeop_p;
-struct	nameidata;
-struct	componentname;
-struct	ucred;
-struct	flock;
-struct	buf;
-struct	uio;
-
-int	spec_vnoperate __P((struct vop_generic_args *));

Property changes on: head/sys/miscfs/specfs/specdev.h
___________________________________________________________________
Deleted: svn:keywords
## -1 +0,0 ##
-FreeBSD=%H
\ No newline at end of property
Index: head/sys/miscfs/specfs/spec_vnops.c
===================================================================
--- head/sys/miscfs/specfs/spec_vnops.c	(revision 49534)
+++ head/sys/miscfs/specfs/spec_vnops.c	(revision 49535)
@@ -1,963 +1,961 @@
 /*
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)spec_vnops.c	8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.89 1999/06/26 02:46:21 mckusick Exp $
+ * $Id: spec_vnops.c,v 1.90 1999/07/20 09:47:45 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
-
-#include <miscfs/specfs/specdev.h>
 
 static int	spec_advlock __P((struct vop_advlock_args *));  
 static int	spec_badop __P((void));
 static int	spec_bmap __P((struct vop_bmap_args *));
 static int	spec_close __P((struct vop_close_args *));
 static int	spec_freeblks __P((struct vop_freeblks_args *));
 static int	spec_fsync __P((struct  vop_fsync_args *));
 static int	spec_getattr __P((struct  vop_getattr_args *));
 static int	spec_getpages __P((struct vop_getpages_args *));
 static int	spec_inactive __P((struct  vop_inactive_args *));
 static int	spec_ioctl __P((struct vop_ioctl_args *));
 static int	spec_lookup __P((struct vop_lookup_args *));
 static int	spec_open __P((struct vop_open_args *));
 static int	spec_poll __P((struct vop_poll_args *));
 static int	spec_print __P((struct vop_print_args *));
 static int	spec_read __P((struct vop_read_args *));  
 static int	spec_strategy __P((struct vop_strategy_args *));
 static int	spec_write __P((struct vop_write_args *));
 
 vop_t **spec_vnodeop_p;
 static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) vop_ebadf },
 	{ &vop_advlock_desc,		(vop_t *) spec_advlock },
 	{ &vop_bmap_desc,		(vop_t *) spec_bmap },
 	{ &vop_close_desc,		(vop_t *) spec_close },
 	{ &vop_create_desc,		(vop_t *) spec_badop },
 	{ &vop_freeblks_desc,		(vop_t *) spec_freeblks },
 	{ &vop_fsync_desc,		(vop_t *) spec_fsync },
 	{ &vop_getattr_desc,		(vop_t *) spec_getattr },
 	{ &vop_getpages_desc,		(vop_t *) spec_getpages },
 	{ &vop_inactive_desc,		(vop_t *) spec_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) spec_ioctl },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) spec_badop },
 	{ &vop_lookup_desc,		(vop_t *) spec_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) spec_badop },
 	{ &vop_mknod_desc,		(vop_t *) spec_badop },
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) spec_badop },
 	{ &vop_readlink_desc,		(vop_t *) spec_badop },
 	{ &vop_reallocblks_desc,	(vop_t *) spec_badop },
 	{ &vop_reclaim_desc,		(vop_t *) vop_null },
 	{ &vop_remove_desc,		(vop_t *) spec_badop },
 	{ &vop_rename_desc,		(vop_t *) spec_badop },
 	{ &vop_rmdir_desc,		(vop_t *) spec_badop },
 	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
 	{ &vop_strategy_desc,		(vop_t *) spec_strategy },
 	{ &vop_symlink_desc,		(vop_t *) spec_badop },
 	{ &vop_write_desc,		(vop_t *) spec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
 
 VNODEOP_SET(spec_vnodeop_opv_desc);
 
 
 int
 spec_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 static void spec_getpages_iodone __P((struct buf *bp));
 
 /*
  * Trivial lookup routine that always fails.
  */
 static int
 spec_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * Open a special file.
  */
 /* ARGSUSED */
 static int
 spec_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct proc *p = ap->a_p;
 	struct vnode *bvp, *vp = ap->a_vp;
 	dev_t bdev, dev = vp->v_rdev;
 	int error;
 	struct cdevsw *dsw;
 
 	/*
 	 * Don't allow open if fs is mounted -nodev.
 	 */
 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
 		return (ENXIO);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		dsw = devsw(dev);
 		if ( (dsw == NULL) || (dsw->d_open == NULL))
 			return ENXIO;
 		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
 			/*
 			 * When running in very secure mode, do not allow
 			 * opens for writing of any disk character devices.
 			 */
 			if (securelevel >= 2
 			    && dsw->d_bmaj != -1
 			    && (dsw->d_flags & D_TYPEMASK) == D_DISK)
 				return (EPERM);
 			/*
 			 * When running in secure mode, do not allow opens
 			 * for writing of /dev/mem, /dev/kmem, or character
 			 * devices whose corresponding block devices are
 			 * currently mounted.
 			 */
 			if (securelevel >= 1) {
 				if ((bdev = chrtoblk(dev)) != NODEV &&
 				    vfinddev(bdev, VBLK, &bvp) &&
 				    bvp->v_usecount > 0 &&
 				    (error = vfs_mountedon(bvp)))
 					return (error);
 				if (iskmemdev(dev))
 					return (EPERM);
 			}
 		}
 		if ((dsw->d_flags & D_TYPEMASK) == D_TTY)
 			vp->v_flag |= VISTTY;
 		VOP_UNLOCK(vp, 0, p);
 		error = (*dsw->d_open)(dev, ap->a_mode, S_IFCHR, p);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 		/* NOT REACHED */
 	case VBLK:
 		dsw = bdevsw(dev);
 		if ( (dsw == NULL) || (dsw->d_open == NULL))
 			return ENXIO;
 		/*
 		 * When running in very secure mode, do not allow
 		 * opens for writing of any disk block devices.
 		 */
 		if (securelevel >= 2 && ap->a_cred != FSCRED &&
 		    (ap->a_mode & FWRITE) &&
 		    (dsw->d_flags & D_TYPEMASK) == D_DISK)
 			return (EPERM);
 
 		/*
 		 * Do not allow opens of block devices that are
 		 * currently mounted.
 		 */
 		error = vfs_mountedon(vp);
 		if (error)
 			return (error);
 		return ((*dsw->d_open)(dev, ap->a_mode, S_IFBLK, p));
 		/* NOT REACHED */
 	default:
 		break;
 	}
 	return (0);
 }
 
 /*
  * Vnode op for read
  */
 /* ARGSUSED */
 static int
 spec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
  	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn, nextbn;
 	long bsize, bscale;
 	struct partinfo dpart;
 	int n, on;
 	d_ioctl_t *ioctl;
 	int error = 0;
 	dev_t dev;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("spec_read mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_read proc");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*devsw(vp->v_rdev)->d_read)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		dev = vp->v_rdev;
 
 		/*
 		 * Calculate block size for block device.  The block size must
 		 * be larger then the physical minimum.
 		 */
 
 		bsize = vp->v_specinfo->si_bsize_best;
 
 		if ((ioctl = bdevsw(dev)->d_ioctl) != NULL &&
 		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
 		    dpart.part->p_fstype == FS_BSDFFS &&
 		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		bscale = btodb(bsize);
 		do {
 			bn = btodb(uio->uio_offset) & ~(bscale - 1);
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (vp->v_lastr + bscale == bn) {
 				nextbn = bn + bscale;
 				error = breadn(vp, bn, (int)bsize, &nextbn,
 					(int *)&bsize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
 			vp->v_lastr = bn;
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			brelse(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_read type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Vnode op for write
  */
 /* ARGSUSED */
 static int
 spec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn;
 	int bsize, blkmask;
 	struct partinfo dpart;
 	register int n, on;
 	int error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("spec_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_write proc");
 #endif
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*devsw(vp->v_rdev)->d_write)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_resid == 0)
 			return (0);
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 
 		/*
 		 * Calculate block size for block device.  The block size must
 		 * be larger then the physical minimum.
 		 */
 		bsize = vp->v_specinfo->si_bsize_best;
 
 		if ((*bdevsw(vp->v_rdev)->d_ioctl)(vp->v_rdev, DIOCGPART,
 		    (caddr_t)&dpart, FREAD, p) == 0) {
 			if (dpart.part->p_fstype == FS_BSDFFS &&
 			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 				bsize = dpart.part->p_frag *
 				    dpart.part->p_fsize;
 		}
 		blkmask = btodb(bsize) - 1;
 		do {
 			bn = btodb(uio->uio_offset) & ~blkmask;
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (n == bsize)
 				bp = getblk(vp, bn, bsize, 0, 0);
 			else
 				error = bread(vp, bn, bsize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			n = min(n, bsize - bp->b_resid);
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			if (n + on == bsize)
 				bawrite(bp);
 			else
 				bdwrite(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_write type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Device ioctl operation.
  */
 /* ARGSUSED */
 static int
 spec_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		int  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	dev_t dev = ap->a_vp->v_rdev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return ((*devsw(dev)->d_ioctl)(dev, ap->a_command, 
 		    ap->a_data, ap->a_fflag, ap->a_p));
 	case VBLK:
 		return ((*bdevsw(dev)->d_ioctl)(dev, ap->a_command, 
 		    ap->a_data, ap->a_fflag, ap->a_p));
 	default:
 		panic("spec_ioctl");
 		/* NOTREACHED */
 	}
 }
 
 /* ARGSUSED */
 static int
 spec_poll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register dev_t dev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		dev = ap->a_vp->v_rdev;
 		return (*devsw(dev)->d_poll)(dev, ap->a_events, ap->a_p);
 	default:
 		return (vop_defaultop((struct vop_generic_args *)ap));
 
 	}
 }
 /*
  * Synch buffers associated with a block device
  */
 /* ARGSUSED */
 static int
 spec_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int  a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct buf *bp;
 	struct buf *nbp;
 	int s;
 
 	if (vp->v_type == VCHR)
 		return (0);
 	/*
 	 * Flush all dirty buffers associated with a block device.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("spec_fsync: not dirty");
 		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
 			BUF_UNLOCK(bp);
 			vfs_bio_awrite(bp);
 			splx(s);
 		} else {
 			bremfree(bp);
 			splx(s);
 			bawrite(bp);
 		}
 		goto loop;
 	}
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
 		}
 #ifdef DIAGNOSTIC
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			vprint("spec_fsync: dirty", vp);
 			splx(s);
 			goto loop;
 		}
 #endif
 	}
 	splx(s);
 	return (0);
 }
 
 static int
 spec_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Just call the device strategy routine
  */
 static int
 spec_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp;
 
 	bp = ap->a_bp;
 	if (((bp->b_flags & B_READ) == 0) &&
 		(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
 		(*bioops.io_start)(bp);
 	(*bdevsw(bp->b_dev)->d_strategy)(bp);
 	return (0);
 }
 
 static int
 spec_freeblks(ap)
 	struct vop_freeblks_args /* {
 		struct vnode *a_vp;
 		daddr_t a_addr;
 		daddr_t a_length;
 	} */ *ap;
 {
 	struct cdevsw *bsw;
 	struct buf *bp;
 
 	bsw = bdevsw(ap->a_vp->v_rdev);
 	if ((bsw->d_flags & D_CANFREE) == 0)
 		return (0);
 	bp = geteblk(ap->a_length);
 	bp->b_flags |= B_FREEBUF;
 	bp->b_dev = ap->a_vp->v_rdev;
 	bp->b_blkno = ap->a_addr;
 	bp->b_offset = dbtob(ap->a_addr);
 	bp->b_bcount = ap->a_length;
 	(*bsw->d_strategy)(bp);
 	return (0);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 spec_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Device close routine
  */
 /* ARGSUSED */
 static int
 spec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	dev_t dev = vp->v_rdev;
 	d_close_t *devclose;
 	int mode, error;
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		/*
 		 * Hack: a tty device that is a controlling terminal
 		 * has a reference from the session structure.
 		 * We cannot easily tell that a character device is
 		 * a controlling terminal, unless it is the closing
 		 * process' controlling terminal.  In that case,
 		 * if the reference count is 2 (this last descriptor
 		 * plus the session), release the reference from the session.
 		 */
 		if (vcount(vp) == 2 && ap->a_p &&
 		    (vp->v_flag & VXLOCK) == 0 &&
 		    vp == ap->a_p->p_session->s_ttyvp) {
 			vrele(vp);
 			ap->a_p->p_session->s_ttyvp = NULL;
 		}
 		/*
 		 * If the vnode is locked, then we are in the midst
 		 * of forcably closing the device, otherwise we only
 		 * close on last reference.
 		 */
 		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 		devclose = devsw(dev)->d_close;
 		mode = S_IFCHR;
 		break;
 
 	case VBLK:
 		/*
 		 * On last close of a block device (that isn't mounted)
 		 * we must invalidate any in core blocks, so that
 		 * we can, for instance, change floppy disks.
 		 */
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
 		VOP_UNLOCK(vp, 0, ap->a_p);
 		if (error)
 			return (error);
 
 		/*
 		 * We do not want to really close the device if it
 		 * is still in use unless we are trying to close it
 		 * forcibly. Since every use (buffer, vnode, swap, cmap)
 		 * holds a reference to the vnode, and because we mark
 		 * any other vnodes that alias this device, when the
 		 * sum of the reference counts on all the aliased
 		 * vnodes descends to one, we are on last close.
 		 */
 		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 
 		devclose = bdevsw(dev)->d_close;
 		mode = S_IFBLK;
 		break;
 
 	default:
 		panic("spec_close: not special");
 	}
 
 	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 spec_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
 		minor(ap->a_vp->v_rdev));
 	return (0);
 }
 
 /*
  * Special device advisory byte-level locks.
  */
 /* ARGSUSED */
 static int
 spec_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 
 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
 }
 
 /*
  * Special device bad operation
  */
 static int
 spec_badop()
 {
 
 	panic("spec_badop called");
 	/* NOTREACHED */
 }
 
 static void
 spec_getpages_iodone(bp)
 	struct buf *bp;
 {
 
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 static int
 spec_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	vm_offset_t kva;
 	int error;
 	int i, pcount, size, s;
 	daddr_t blkno;
 	struct buf *bp;
 	vm_page_t m;
 	vm_ooffset_t offset;
 	int toff, nextoff, nread;
 	struct vnode *vp = ap->a_vp;
 	int blksiz;
 	int gotreqpage;
 
 	error = 0;
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 
 	/*
 	 * Calculate the offset of the transfer and do sanity check.
 	 * FreeBSD currently only supports an 8 TB range due to b_blkno
 	 * being in DEV_BSIZE ( usually 512 ) byte chunks on call to
 	 * VOP_STRATEGY.  XXX
 	 */
 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
 
 #define	DADDR_T_BIT	(sizeof(daddr_t)*8)
 #define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
 
 	if (offset < 0 || offset > OFFSET_MAX) {
 		/* XXX still no %q in kernel. */
 		printf("spec_getpages: preposterous offset 0x%x%08x\n",
 		       (u_int)((u_quad_t)offset >> 32),
 		       (u_int)(offset & 0xffffffff));
 		return (VM_PAGER_ERROR);
 	}
 
 	blkno = btodb(offset);
 
 	/*
 	 * Round up physical size for real devices.  We cannot round using
 	 * v_mount's block size data because v_mount has nothing to do with
 	 * the device.  i.e. it's usually '/dev'.  We need the physical block
 	 * size for the device itself.
 	 *
 	 * We can't use v_specmountpoint because it only exists when the
 	 * block device is mounted.  However, we can use v_specinfo.
 	 */
 
 	if (vp->v_type == VBLK)
 		blksiz = vp->v_specinfo->si_bsize_phys;
 	else
 		blksiz = DEV_BSIZE;
 
 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
 
 	bp = getpbuf(NULL);
 	kva = (vm_offset_t)bp->b_data;
 
 	/*
 	 * Map the pages to be read into the kva.
 	 */
 	pmap_qenter(kva, ap->a_m, pcount);
 
 	/* Build a minimal buffer header. */
 	bp->b_flags = B_READ | B_CALL;
 	bp->b_iodone = spec_getpages_iodone;
 
 	/* B_PHYS is not set, but it is nice to fill this in. */
 	bp->b_rcred = bp->b_wcred = curproc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = blkno;
 	bp->b_lblkno = blkno;
 	pbgetvp(ap->a_vp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 	bp->b_resid = 0;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += pcount;
 
 	/* Do the input. */
 	VOP_STRATEGY(bp->b_vp, bp);
 
 	s = splbio();
 
 	/* We definitely need to be at splbio here. */
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep(bp, PVM, "spread", 0);
 
 	splx(s);
 
 	if ((bp->b_flags & B_ERROR) != 0) {
 		if (bp->b_error)
 			error = bp->b_error;
 		else
 			error = EIO;
 	}
 
 	nread = size - bp->b_resid;
 
 	if (nread < ap->a_count) {
 		bzero((caddr_t)kva + nread,
 			ap->a_count - nread);
 	}
 	pmap_qremove(kva, pcount);
 
 
 	gotreqpage = 0;
 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= nread) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else if (toff < nread) {
 			/*
 			 * Since this is a VM request, we have to supply the
 			 * unaligned offset to allow vm_page_set_validclean()
 			 * to zero sub-DEV_BSIZE'd portions of the page.
 			 */
 			vm_page_set_validclean(m, 0, nread - toff);
 		} else {
 			m->valid = 0;
 			m->dirty = 0;
 		}
 
 		if (i != ap->a_reqpage) {
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
 				if (m->valid) {
 					if (m->flags & PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 					vm_page_wakeup(m);
 				} else {
 					vm_page_free(m);
 				}
 			} else {
 				vm_page_free(m);
 			}
 		} else if (m->valid) {
 			gotreqpage = 1;
 			/*
 			 * Since this is a VM request, we need to make the
 			 * entire page presentable by zeroing invalid sections.
 			 */
 			if (m->valid != VM_PAGE_BITS_ALL)
 			    vm_page_zero_invalid(m, FALSE);
 		}
 	}
 	if (!gotreqpage) {
 		m = ap->a_m[ap->a_reqpage];
 #ifndef MAX_PERF
 		printf(
 	    "spec_getpages: I/O read failure: (error code=%d) bp %p vp %p\n",
 		    error, bp, bp->b_vp);
 		printf(
 	    "               size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
 		    size, bp->b_resid, ap->a_count, m->valid);
 		printf(
 	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
 		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
 #endif
 		/*
 		 * Free the buffer header back to the swap buffer pool.
 		 */
 		relpbuf(bp, NULL);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Free the buffer header back to the swap buffer pool.
 	 */
 	relpbuf(bp, NULL);
 	return VM_PAGER_OK;
 }
 
 /* ARGSUSED */
 static int
 spec_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	struct partinfo dpart;
 
 	bzero(vap, sizeof (*vap));
 
 	if (vp->v_type == VBLK) {
 		if (vp->v_specinfo)
 			vap->va_blocksize = vp->v_specmountpoint->mnt_stat.f_iosize;
 		else
 			vap->va_blocksize = BLKDEV_IOSIZE;
 	} else if (vp->v_type == VCHR) {
 		vap->va_blocksize = MAXBSIZE;
 	}
 
 	if ((*bdevsw(vp->v_rdev)->d_ioctl)(vp->v_rdev, DIOCGPART,
 	    (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
 		vap->va_bytes = dbtob(dpart.disklab->d_partitions
 				      [minor(vp->v_rdev)].p_size);
 		vap->va_size = vap->va_bytes;
 	}
 	return (0);
 }
Index: head/sys/msdosfs/msdosfs_vfsops.c
===================================================================
--- head/sys/msdosfs/msdosfs_vfsops.c	(revision 49534)
+++ head/sys/msdosfs/msdosfs_vfsops.c	(revision 49535)
@@ -1,1017 +1,1016 @@
-/*	$Id: msdosfs_vfsops.c,v 1.44 1999/05/08 06:40:00 phk Exp $ */
+/*	$Id: msdosfs_vfsops.c,v 1.45 1999/05/31 11:28:02 phk Exp $ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h> 				/* defines ALLPERMS */
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/bootsect.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
 static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
 
 static int	update_mp __P((struct mount *mp, struct msdosfs_args *argp));
 static int	mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
 				  struct proc *p, struct msdosfs_args *argp));
 static int	msdosfs_fhtovp __P((struct mount *, struct fid *,
 				    struct sockaddr *, struct vnode **, int *,
 				    struct ucred **));
 static int	msdosfs_mount __P((struct mount *, char *, caddr_t,
 				   struct nameidata *, struct proc *));
 static int	msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				      struct proc *));
 static int	msdosfs_root __P((struct mount *, struct vnode **));
 static int	msdosfs_start __P((struct mount *, int, struct proc *));
 static int	msdosfs_statfs __P((struct mount *, struct statfs *,
 				    struct proc *));
 static int	msdosfs_sync __P((struct mount *, int, struct ucred *,
 				  struct proc *));
 static int	msdosfs_unmount __P((struct mount *, int, struct proc *));
 static int	msdosfs_vget __P((struct mount *mp, ino_t ino,
 				  struct vnode **vpp));
 static int	msdosfs_vptofh __P((struct vnode *, struct fid *));
 
 static int
 update_mp(mp, argp)
 	struct mount *mp;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error;
 
 	pmp->pm_gid = argp->gid;
 	pmp->pm_uid = argp->uid;
 	pmp->pm_mask = argp->mask & ALLPERMS;
 	pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
 	if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
 		bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
 		bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
 		bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
 		bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
 		bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
 	}
 
 #ifndef __FreeBSD__
 	/*
 	 * GEMDOS knows nothing (yet) about win95
 	 */
 	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
 #endif
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 	else if (!(pmp->pm_flags &
 	    (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
 		struct vnode *rootvp;
 
 		/*
 		 * Try to divine whether to support Win'95 long filenames
 		 */
 		if (FAT32(pmp))
 			pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
 		else {
 			if ((error = msdosfs_root(mp, &rootvp)) != 0)
 				return error;
 			pmp->pm_flags |= findwin95(VTODE(rootvp))
 				? MSDOSFSMNT_LONGNAME
 					: MSDOSFSMNT_SHORTNAME;
 			vput(rootvp);
 		}
 	}
 	return 0;
 }
 
 #ifndef __FreeBSD__
 int
 msdosfs_mountroot()
 {
 	register struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	size_t size;
 	int error;
 	struct msdosfs_args args;
 
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
 	/*
 	 * Get vnodes for swapdev and rootdev.
 	 */
 	if (bdevvp(rootdev, &rootvp))
 		panic("msdosfs_mountroot: can't setup rootvp");
 
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &msdosfs_vfsops;
 	mp->mnt_flag = 0;
 	LIST_INIT(&mp->mnt_vnodelist);
 
 	args.flags = 0;
 	args.uid = 0;
 	args.gid = 0;
 	args.mask = 0777;
 
 	if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = update_mp(mp, &args)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = vfs_lock(mp)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mp->mnt_vnodecovered = NULLVP;
 	(void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)msdosfs_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	return (0);
 }
 #endif
 
 /*
  * mp - path - addr in user space of mount point (ie /usr or whatever)
  * data - addr in user space of mount params including the name of the block
  * special file to treat as a filesystem.
  */
 static int
 msdosfs_mount(mp, path, data, ndp, p)
 	struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
 	struct msdosfs_args args; /* will hold data from mount request */
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	size_t size;
 	int error, flags;
 	mode_t accessmode;
 
 	error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
 	if (error)
 		return (error);
 	if (args.magic != MSDOSFS_ARGSMAGIC)
 		args.flags = 0;
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		pmp = VFSTOMSDOSFS(mp);
 		error = 0;
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = vflush(mp, NULLVP, flags);
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			/* not yet implemented */
 			error = EOPNOTSUPP;
 		if (error)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = pmp->pm_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
 						   p->p_ucred, p);
 				if (error) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 		}
 		if (args.fspec == 0) {
 #ifdef	__notyet__		/* doesn't work correctly with current mountd	XXX */
 			if (args.flags & MSDOSFSMNT_MNTOPT) {
 				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
 				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
 				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 			}
 #endif
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &pmp->pm_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	error = namei(ndp);
 	if (error)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 		if (error) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp, p, &args);
 #ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
 		pmp = VFSTOMSDOSFS(mp);
 #endif
 	} else {
 		if (devvp != pmp->pm_devvp)
 			error = EINVAL;	/* XXX needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 
 	error = update_mp(mp, &args);
 	if (error) {
 		msdosfs_unmount(mp, MNT_FORCE, p);
 		return error;
 	}
 
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) msdosfs_statfs(mp, &mp->mnt_stat, p);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
 #endif
 	return (0);
 }
 
 static int
 mountmsdosfs(devvp, mp, p, argp)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
 	dev_t dev = devvp->v_rdev;
 #ifndef __FreeBSD__
 	struct partinfo dpart;
 	int bsize = 0, dtype = 0, tmp;
 #endif
 	union bootsector *bsp;
 	struct byte_bpb33 *b33;
 	struct byte_bpb50 *b50;
 	struct byte_bpb710 *b710;
 	u_int8_t SecPerClust;
 	int	ronly, error;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp  = NULL; /* both used in error_exit */
 	pmp = NULL;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		/*
 	 	 * We need the disklabel to calculate the size of a FAT entry
 		 * later on. Also make sure the partition contains a filesystem
 		 * of type FS_MSDOS. This doesn't work for floppies, so we have
 		 * to check for them too.
 	 	 *
 	 	 * At least some parts of the msdos fs driver seem to assume
 		 * that the size of a disk block will always be 512 bytes.
 		 * Let's check it...
 		 */
 		error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
 				  FREAD, NOCRED, p);
 		if (error)
 			goto error_exit;
 		tmp   = dpart.part->p_fstype;
 		dtype = dpart.disklab->d_type;
 		bsize = dpart.disklab->d_secsize;
 		if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 	}
 #endif
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 */
 #ifdef	PC98
 	error = bread(devvp, 0, 1024, NOCRED, &bp);
 #else
 	error = bread(devvp, 0, 512, NOCRED, &bp);
 #endif
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
 	bsp = (union bootsector *)bp->b_data;
 	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
 	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
 	b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 #ifdef PC98
 		if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
 		    && (bsp->bs50.bsBootSectSig0 != 0       /* PC98 DOS 3.3x */
 		    || bsp->bs50.bsBootSectSig1 != 0)
 		    && (bsp->bs50.bsBootSectSig0 != 0x90    /* PC98 DOS 5.0  */
 		    || bsp->bs50.bsBootSectSig1 != 0x3d)
 		    && (bsp->bs50.bsBootSectSig0 != 0x46    /* PC98 DOS 3.3B */
 		    || bsp->bs50.bsBootSectSig1 != 0xfa)) {
 #else
 		if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
 	bzero((caddr_t)pmp, sizeof *pmp);
 	pmp->pm_mountp = mp;
 
 	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
 	 */
 	SecPerClust = b50->bpbSecPerClust;
 	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
 	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
 	pmp->pm_FATs = b50->bpbFATs;
 	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
 	pmp->pm_Sectors = getushort(b50->bpbSectors);
 	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
 	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
 	pmp->pm_Heads = getushort(b50->bpbHeads);
 	pmp->pm_Media = b50->bpbMedia;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 		/* XXX - We should probably check more values here */
 		if (!pmp->pm_BytesPerSec || !SecPerClust
 			|| !pmp->pm_Heads || pmp->pm_Heads > 255
 #ifdef PC98
 	    		|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
 #else
 			|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
 	} else {
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
 	if (pmp->pm_HugeSectors > 0xffffffff / 
 	    (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
 		/*
 		 * We cannot deal currently with this size of disk
 		 * due to fileid limitations (see msdosfs_getattr and
 		 * msdosfs_readdir)
 		 */
 		error = EINVAL;
 		printf("mountmsdosfs(): disk too big, sorry\n");
 		goto error_exit;
 	}
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
 		    || bsp->bs710.bsBootSectSig3 != BOOTSIG3
 		    || pmp->pm_Sectors
 		    || pmp->pm_FATsecs
 		    || getushort(b710->bpbFSVers)) {
 			error = EINVAL;
 			printf("mountmsdosfs(): bad FAT32 filesystem\n");
 			goto error_exit;
 		}
 		pmp->pm_fatmask = FAT32_MASK;
 		pmp->pm_fatmult = 4;
 		pmp->pm_fatdiv = 1;
 		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
 		if (getushort(b710->bpbExtFlags) & FATMIRROR)
 			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
 		else
 			pmp->pm_flags |= MSDOSFS_FATMIRROR;
 	} else
 		pmp->pm_flags |= MSDOSFS_FATMIRROR;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if (FAT32(pmp)) {
 			/*
 			 * GEMDOS doesn't know fat32.
 			 */
 			error = EINVAL;
 			goto error_exit;
 		}
 
 		/*
 		 * Check a few values (could do some more):
 		 * - logical sector size: power of 2, >= block size
 		 * - sectors per cluster: power of 2, >= 1
 		 * - number of sectors:   >= 1, <= size of partition
 		 */
 		if ( (SecPerClust == 0)
 		  || (SecPerClust & (SecPerClust - 1))
 		  || (pmp->pm_BytesPerSec < bsize)
 		  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
 		  || (pmp->pm_HugeSectors == 0)
 		  || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
 							> dpart.part->p_size)
 		   ) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		/*
 		 * XXX - Many parts of the msdos fs driver seem to assume that
 		 * the number of bytes per logical sector (BytesPerSec) will
 		 * always be the same as the number of bytes per disk block
 		 * Let's pretend it is.
 		 */
 		tmp = pmp->pm_BytesPerSec / bsize;
 		pmp->pm_BytesPerSec  = bsize;
 		pmp->pm_HugeSectors *= tmp;
 		pmp->pm_HiddenSects *= tmp;
 		pmp->pm_ResSectors  *= tmp;
 		pmp->pm_Sectors     *= tmp;
 		pmp->pm_FATsecs     *= tmp;
 		SecPerClust         *= tmp;
 	}
 #endif
 	pmp->pm_fatblk = pmp->pm_ResSectors;
 	if (FAT32(pmp)) {
 		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
 		pmp->pm_firstcluster = pmp->pm_fatblk
 			+ (pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
 	} else {
 		pmp->pm_rootdirblk = pmp->pm_fatblk +
 			(pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
 				       + pmp->pm_BytesPerSec - 1)
 			/ pmp->pm_BytesPerSec;/* in sectors */
 		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
 	}
 
 	pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust;
 	pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
 	pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
 		      && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
 		      && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
 		    ) {
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	} else 
 #endif
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
 		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
 			/*
 			 * This will usually be a floppy disk. This size makes
 			 * sure that one fat entry will not be split across
 			 * multiple blocks.
 			 */
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	}
 	if (FAT12(pmp))
 		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
 	else
 		pmp->pm_fatblocksize = DFLTBSIZE;
 
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
 	pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
 
 	/*
 	 * Compute mask and shift value for isolating cluster relative byte
 	 * offsets and cluster numbers from a file offset.
 	 */
 	pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
 	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
 	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
 
 	/*
 	 * Check for valid cluster size
 	 * must be a power of 2
 	 */
 	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	/*
 	 * Release the bootsector buffer.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 	/*
 	 * Check FSInfo.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
 		if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
 		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
 		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
 		else
 			pmp->pm_fsinfo = 0;
 		brelse(bp);
 		bp = NULL;
 	}
 
 	/*
 	 * Check and validate (or perhaps invalidate?) the fsinfo structure?		XXX
 	 */
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
 	 * fill it in.
 	 */
 	pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
 				   / N_INUSEBITS)
 				  * sizeof(*pmp->pm_inusemap),
 				  M_MSDOSFSFAT, M_WAITOK);
 
 	/*
 	 * fillinusemap() needs pm_devvp.
 	 */
 	pmp->pm_dev = dev;
 	pmp->pm_devvp = devvp;
 
 	/*
 	 * Have the inuse map filled in.
 	 */
 	if ((error = fillinusemap(pmp)) != 0)
 		goto error_exit;
 
 	/*
 	 * If they want fat updates to be synchronous then let them suffer
 	 * the performance degradation in exchange for the on disk copy of
 	 * the fat being correct just about all the time.  I suppose this
 	 * would be a good thing to turn on if the kernel is still flakey.
 	 */
 	if (mp->mnt_flag & MNT_SYNCHRONOUS)
 		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
 
 	/*
 	 * Finish up.
 	 */
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else
 		pmp->pm_fmod = 1;
 	mp->mnt_data = (qaddr_t) pmp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_flag |= MNT_LOCAL;
 	devvp->v_specmountpoint = mp;
 
 	return 0;
 
 error_exit:
 	if (bp)
 		brelse(bp);
 	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
 	if (pmp) {
 		if (pmp->pm_inusemap)
 			free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 
 	return (0);
 }
 
 /*
  * Unmount the filesystem described by mp.
  */
 static int
 msdosfs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	error = vflush(mp, NULLVP, flags);
 	if (error)
 		return error;
 	pmp = VFSTOMSDOSFS(mp);
 	pmp->pm_devvp->v_specmountpoint = NULL;
 #ifdef MSDOSFS_DEBUG
 	{
 		struct vnode *vp = pmp->pm_devvp;
 
 		printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
 		    vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 		printf("lastr %d, id %lu, mount %p, op %p\n",
 		    vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
 		printf("freef %p, freeb %p, mount %p\n",
 		    vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    TAILQ_FIRST(&vp->v_cleanblkhd),
 		    TAILQ_FIRST(&vp->v_dirtyblkhd),
 		    vp->v_numoutput, vp->v_type);
 		printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
 		    vp->v_socket, vp->v_tag,
 		    ((u_int *)vp->v_data)[0],
 		    ((u_int *)vp->v_data)[1]);
 	}
 #endif
 	error = VOP_CLOSE(pmp->pm_devvp,
 		    (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
 		    NOCRED, p);
 	vrele(pmp->pm_devvp);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 static int
 msdosfs_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
 #endif
 	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
 	if (error)
 		return (error);
 	*vpp = DETOV(ndep);
 	return (0);
 }
 
 static int
 msdosfs_quotactl(mp, cmds, uid, arg, p)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 msdosfs_statfs(mp, sbp, p)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 
 	pmp = VFSTOMSDOSFS(mp);
 	sbp->f_bsize = pmp->pm_bpcluster;
 	sbp->f_iosize = pmp->pm_bpcluster;
 	sbp->f_blocks = pmp->pm_nmbrofclusters;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
 	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (0);
 }
 
 static int
 msdosfs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *vp, *nvp;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
 
 	/*
 	 * If we ever switch to not updating all of the fats all the time,
 	 * this would be the place to update them from the first one.
 	 */
 	if (pmp->pm_fmod != 0) {
 		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
 			panic("msdosfs_sync: rofs mod");
 		else {
 			/* update fats here */
 		}
 	}
 	/*
 	 * Write back each (modified) denode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		dep = VTODE(vp);
 		if (vp->v_type == VNON ||
 		    ((dep->de_flag &
 		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
 		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		error = VOP_FSYNC(vp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 
 	/*
 	 * Flush filesystem control info.
 	 */
 	if (waitfor != MNT_LAZY) {
 		vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(pmp->pm_devvp, 0, p);
 	}
 	return (allerror);
 }
 
 static int
 msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
 	struct denode *dep;
 	struct netcred *np;
 	int error;
 
 	np = vfs_export_lookup(mp, &pmp->pm_export, nam);
 	if (np == NULL)
 		return (EACCES);
 	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	*vpp = DETOV(dep);
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 static int
 msdosfs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	struct denode *dep;
 	struct defid *defhp;
 
 	dep = VTODE(vp);
 	defhp = (struct defid *)fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 static int
 msdosfs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	return EOPNOTSUPP;
 }
 
 static struct vfsops msdosfs_vfsops = {
 	msdosfs_mount,
 	msdosfs_start,
 	msdosfs_unmount,
 	msdosfs_root,
 	msdosfs_quotactl,
 	msdosfs_statfs,
 	msdosfs_sync,
 	msdosfs_vget,
 	msdosfs_fhtovp,
 	msdosfs_vptofh,
 	msdosfs_init
 };
 
 VFS_SET(msdosfs_vfsops, msdos, 0);
Index: head/sys/msdosfs/msdosfs_vnops.c
===================================================================
--- head/sys/msdosfs/msdosfs_vnops.c	(revision 49534)
+++ head/sys/msdosfs/msdosfs_vnops.c	(revision 49535)
@@ -1,1986 +1,1985 @@
-/*	$Id: msdosfs_vnops.c,v 1.86 1999/06/26 02:46:26 mckusick Exp $ */
+/*	$Id: msdosfs_vnops.c,v 1.87 1999/07/25 04:01:32 bde Exp $ */
 /*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 #include <vm/vnode_pager.h>
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 /*
  * Prototypes for MSDOSFS vnode operations
  */
 static int msdosfs_create __P((struct vop_create_args *));
 static int msdosfs_mknod __P((struct vop_mknod_args *));
 static int msdosfs_close __P((struct vop_close_args *));
 static int msdosfs_access __P((struct vop_access_args *));
 static int msdosfs_getattr __P((struct vop_getattr_args *));
 static int msdosfs_setattr __P((struct vop_setattr_args *));
 static int msdosfs_read __P((struct vop_read_args *));
 static int msdosfs_write __P((struct vop_write_args *));
 static int msdosfs_fsync __P((struct vop_fsync_args *));
 static int msdosfs_remove __P((struct vop_remove_args *));
 static int msdosfs_link __P((struct vop_link_args *));
 static int msdosfs_rename __P((struct vop_rename_args *));
 static int msdosfs_mkdir __P((struct vop_mkdir_args *));
 static int msdosfs_rmdir __P((struct vop_rmdir_args *));
 static int msdosfs_symlink __P((struct vop_symlink_args *));
 static int msdosfs_readdir __P((struct vop_readdir_args *));
 static int msdosfs_abortop __P((struct vop_abortop_args *));
 static int msdosfs_bmap __P((struct vop_bmap_args *));
 static int msdosfs_strategy __P((struct vop_strategy_args *));
 static int msdosfs_print __P((struct vop_print_args *));
 static int msdosfs_pathconf __P((struct vop_pathconf_args *ap));
 static int msdosfs_getpages __P((struct vop_getpages_args *));
 static int msdosfs_putpages __P((struct vop_putpages_args *));
 
 /*
  * Some general notes:
  *
  * In the ufs filesystem the inodes, superblocks, and indirect blocks are
  * read/written using the vnode for the filesystem. Blocks that represent
  * the contents of a file are read/written using the vnode for the file
  * (including directories when they are read/written as files). This
  * presents problems for the dos filesystem because data that should be in
  * an inode (if dos had them) resides in the directory itself.  Since we
  * must update directory entries without the benefit of having the vnode
  * for the directory we must use the vnode for the filesystem.  This means
  * that when a directory is actually read/written (via read, write, or
  * readdir, or seek) we must use the vnode for the filesystem instead of
  * the vnode for the directory as would happen in ufs. This is to insure we
  * retreive the correct block from the buffer cache since the hash value is
  * based upon the vnode address and the desired block number.
  */
 
 /*
  * Create a regular file. On entry the directory to contain the file being
  * created is locked.  We must release before we return. We must also free
  * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
  * only if the SAVESTART bit in cn_flags is clear on success.
  */
 static int
 msdosfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode ndirent;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct timespec ts;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
 #endif
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad;
 	}
 
 	/*
 	 * Create a directory entry for the file, then call createde() to
 	 * have it installed. NOTE: DOS files are always executable.  We
 	 * use the absence of the owner write bit to make the file
 	 * readonly.
 	 */
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_create: no name");
 #endif
 	bzero(&ndirent, sizeof(ndirent));
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ?
 				ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = 0;
 	ndirent.de_FileSize = 0;
 	ndirent.de_dev = pdep->de_dev;
 	ndirent.de_devvp = pdep->de_devvp;
 	ndirent.de_pmp = pdep->de_pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	getnanotime(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 static int
 msdosfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 
 	switch (ap->a_vap->va_type) {
 	case VDIR:
 		return (msdosfs_mkdir((struct vop_mkdir_args *)ap));
 		break;
 
 	case VREG:
 		return (msdosfs_create((struct vop_create_args *)ap));
 		break;
 
 	default:
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 static int
 msdosfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct timespec ts;
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1) {
 		getnanotime(&ts);
 		DETIMES(dep, &ts, &ts, &ts);
 	}
 	simple_unlock(&vp->v_interlock);
 	return 0;
 }
 
 static int
 msdosfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, file_mode, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 
 	file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) |
 	    ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH));
 	file_mode &= pmp->pm_mask;
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 	}
 
 	/* User id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return 0;
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == pmp->pm_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return (file_mode & mask) == mask ? 0 : EACCES;
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (pmp->pm_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return (file_mode & mask) == mask ? 0 : EACCES;
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return (file_mode & mask) == mask ? 0 : EACCES;
 }
 
 static int
 msdosfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	mode_t mode;
 	struct timespec ts;
 	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 	u_long fileid;
 
 	getnanotime(&ts);
 	DETIMES(dep, &ts, &ts, &ts);
 	vap->va_fsid = dev2udev(dep->de_dev);
 	/*
 	 * The following computation of the fileid must be the same as that
 	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
 	 * doesn't work.
 	 */
 	if (dep->de_Attributes & ATTR_DIRECTORY) {
 		fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk;
 		if (dep->de_StartCluster == MSDOSFSROOT)
 			fileid = 1;
 	} else {
 		fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk;
 		if (dep->de_dirclust == MSDOSFSROOT)
 			fileid = roottobn(pmp, 0) * dirsperblk;
 		fileid += dep->de_diroffset / sizeof(struct direntry);
 	}
 	vap->va_fileid = fileid;
 	if ((dep->de_Attributes & ATTR_READONLY) == 0)
 		mode = S_IRWXU|S_IRWXG|S_IRWXO;
 	else
 		mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
 	vap->va_mode = mode & pmp->pm_mask;
 	vap->va_uid = pmp->pm_uid;
 	vap->va_gid = pmp->pm_gid;
 	vap->va_nlink = 1;
 	vap->va_rdev = 0;
 	vap->va_size = dep->de_FileSize;
 	dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime);
 	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
 		dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime);
 		dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime);
 	} else {
 		vap->va_atime = vap->va_mtime;
 		vap->va_ctime = vap->va_mtime;
 	}
 	vap->va_flags = 0;
 	if ((dep->de_Attributes & ATTR_ARCHIVE) == 0)
 		vap->va_flags |= SF_ARCHIVED;
 	vap->va_gen = 0;
 	vap->va_blocksize = pmp->pm_bpcluster;
 	vap->va_bytes =
 	    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
 	vap->va_type = ap->a_vp->v_type;
 	vap->va_filerev = dep->de_modrev;
 	return (0);
 }
 
 static int
 msdosfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	int error = 0;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n",
 	    ap->a_vp, vap, cred, ap->a_p);
 #endif
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 #ifdef MSDOSFS_DEBUG
 		printf("msdosfs_setattr(): returning EINVAL\n");
 		printf("    va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n",
 		    vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid);
 		printf("    va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n",
 		    vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen);
 		printf("    va_uid %x, va_gid %x\n",
 		    vap->va_uid, vap->va_gid);
 #endif
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return (error);
 		/*
 		 * We are very inconsistent about handling unsupported
 		 * attributes.  We ignored the access time and the
 		 * read and execute bits.  We were strict for the other
 		 * attributes.
 		 *
 		 * Here we are strict, stricter than ufs in not allowing
 		 * users to attempt to set SF_SETTABLE bits or anyone to
 		 * set unsupported bits.  However, we ignore attempts to
 		 * set ATTR_ARCHIVE for directories `cp -pr' from a more
 		 * sensible file system attempts it a lot.
 		 */
 		if (cred->cr_uid != 0) {
 			if (vap->va_flags & SF_SETTABLE)
 				return EPERM;
 		}
 		if (vap->va_flags & ~SF_ARCHIVED)
 			return EOPNOTSUPP;
 		if (vap->va_flags & SF_ARCHIVED)
 			dep->de_Attributes &= ~ATTR_ARCHIVE;
 		else if (!(dep->de_Attributes & ATTR_DIRECTORY))
 			dep->de_Attributes |= ATTR_ARCHIVE;
 		dep->de_flag |= DE_MODIFIED;
 	}
 
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		uid_t uid;
 		gid_t gid;
 		
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		uid = vap->va_uid;
 		if (uid == (uid_t)VNOVAL)
 			uid = pmp->pm_uid;
 		gid = vap->va_gid;
 		if (gid == (gid_t)VNOVAL)
 			gid = pmp->pm_gid;
 		if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
 		    (gid != pmp->pm_gid && !groupmember(gid, cred))) &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return error;
 		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
 			return EINVAL;
 	}
 
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 			/* NOT REACHED */
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 		error = detrunc(dep, vap->va_size, 0, cred, ap->a_p);
 		if (error)
 			return error;
 	}
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p))))
 			return (error);
 		if (vp->v_type != VDIR) {
 			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
 			    vap->va_atime.tv_sec != VNOVAL)
 				unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL);
 			if (vap->va_mtime.tv_sec != VNOVAL)
 				unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL);
 			dep->de_Attributes |= ATTR_ARCHIVE;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	/*
 	 * DOS files only have the ability to have their writability
 	 * attribute set, so we use the owner write bit to set the readonly
 	 * attribute.
 	 */
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != pmp->pm_uid &&
 		    (error = suser_xxx(cred, ap->a_p, PRISON_ROOT)))
 			return (error);
 		if (vp->v_type != VDIR) {
 			/* We ignore the read and execute bits. */
 			if (vap->va_mode & VWRITE)
 				dep->de_Attributes &= ~ATTR_READONLY;
 			else
 				dep->de_Attributes |= ATTR_READONLY;
 			dep->de_flag |= DE_MODIFIED;
 		}
 	}
 	return (deupdat(dep, 1));
 }
 
 static int
 msdosfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error = 0;
 	int diff;
 	int blsize;
 	int isadir;
 	int orig_resid;
 	long n;
 	long on;
 	daddr_t lbn;
 	daddr_t rablock;
 	int rasize;
 	struct buf *bp;
 	struct vnode *vp = ap->a_vp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct uio *uio = ap->a_uio;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	/*
 	 * If they didn't ask for any data, then we are done.
 	 */
 	orig_resid = uio->uio_resid;
 	if (orig_resid <= 0)
 		return (0);
 
 	isadir = dep->de_Attributes & ATTR_DIRECTORY;
 	do {
 		lbn = de_cluster(pmp, uio->uio_offset);
 		on = uio->uio_offset & pmp->pm_crbomask;
 		n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
 		diff = dep->de_FileSize - uio->uio_offset;
 		if (diff <= 0)
 			break;
 		if (diff < n)
 			n = diff;
 		/* convert cluster # to block # if a directory */
 		if (isadir) {
 			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
 			if (error)
 				break;
 		}
 		/*
 		 * If we are operating on a directory file then be sure to
 		 * do i/o with the vnode for the filesystem instead of the
 		 * vnode for the directory.
 		 */
 		if (isadir) {
 			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
 		} else {
 			rablock = lbn + 1;
 			if (vp->v_lastr + 1 == lbn &&
 			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
 				rasize = pmp->pm_bpcluster;
 				error = breadn(vp, lbn, pmp->pm_bpcluster,
 				    &rablock, &rasize, 1, NOCRED, &bp); 
 			} else
 				error = bread(vp, lbn, pmp->pm_bpcluster, 
 				    NOCRED, &bp);
 			vp->v_lastr = lbn;
 		}
 		n = min(n, pmp->pm_bpcluster - bp->b_resid);
 		if (error) {
 			brelse(bp);
 			break;
 		}
 		error = uiomove(bp->b_data + on, (int) n, uio);
 		brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n != 0);
 	if (!isadir && (error == 0 || uio->uio_resid != orig_resid) &&
 	    (vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
 		dep->de_flag |= DE_ACCESS;
 	return (error);
 }
 
 /*
  * Write data to a file or directory.
  */
 static int
 msdosfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int n;
 	int croffset;
 	int resid;
 	u_long osize;
 	int error = 0;
 	u_long count;
 	daddr_t bn, lastcn;
 	struct buf *bp;
 	int ioflag = ap->a_ioflag;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *thisvp;
 	struct denode *dep = VTODE(vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct ucred *cred = ap->a_cred;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
 	    vp, uio, ioflag, cred);
 	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
 	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = dep->de_FileSize;
 		thisvp = vp;
 		break;
 	case VDIR:
 		return EISDIR;
 	default:
 		panic("msdosfs_write(): bad file type");
 	}
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * If they've exceeded their filesize limit, tell them about it.
 	 */
 	if (p &&
 	    ((uio->uio_offset + uio->uio_resid) >
 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
 		psignal(p, SIGXFSZ);
 		return (EFBIG);
 	}
 
 	/*
 	 * If the offset we are starting the write at is beyond the end of
 	 * the file, then they've done a seek.  Unix filesystems allow
 	 * files with holes in them, DOS doesn't so we must fill the hole
 	 * with zeroed blocks.
 	 */
 	if (uio->uio_offset > dep->de_FileSize) {
 		error = deextend(dep, uio->uio_offset, cred);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Remember some values in case the write fails.
 	 */
 	resid = uio->uio_resid;
 	osize = dep->de_FileSize;
 
 	/*
 	 * If we write beyond the end of the file, extend it to its ultimate
 	 * size ahead of the time to hopefully get a contiguous area.
 	 */
 	if (uio->uio_offset + resid > osize) {
 		count = de_clcount(pmp, uio->uio_offset + resid) -
 			de_clcount(pmp, osize);
 		error = extendfile(dep, count, NULL, NULL, 0);
 		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
 			goto errexit;
 		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
 	} else
 		lastcn = de_clcount(pmp, osize) - 1;
 
 	do {
 		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
 			error = ENOSPC;
 			break;
 		}
 
 		croffset = uio->uio_offset & pmp->pm_crbomask;
 		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
 		if (uio->uio_offset + n > dep->de_FileSize) {
 			dep->de_FileSize = uio->uio_offset + n;
 			/* The object size needs to be set before buffer is allocated */
 			vnode_pager_setsize(vp, dep->de_FileSize);
 		}
 
 		bn = de_cluster(pmp, uio->uio_offset);
 		if ((uio->uio_offset & pmp->pm_crbomask) == 0
 		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) 
 		        > de_cluster(pmp, uio->uio_offset)
 			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
 			/*
 			 * If either the whole cluster gets written,
 			 * or we write the cluster from its start beyond EOF,
 			 * then no need to read data from disk.
 			 */
 			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
 			clrbuf(bp);
 			/*
 			 * Do the bmap now, since pcbmap needs buffers
 			 * for the fat table. (see msdosfs_strategy)
 			 */
 			if (bp->b_blkno == bp->b_lblkno) {
 				error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 
 				     0, 0);
 				if (error)
 					bp->b_blkno = -1;
 			}
 			if (bp->b_blkno == -1) {
 				brelse(bp);
 				if (!error)
 					error = EIO;		/* XXX */
 				break;
 			}
 		} else {
 			/*
 			 * The block we need to write into exists, so read it in.
 			 */
 			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 		}
 
 		/*
 		 * Should these vnode_pager_* functions be done on dir
 		 * files?
 		 */
 
 		/*
 		 * Copy the data from user space into the buf header.
 		 */
 		error = uiomove(bp->b_data + croffset, n, uio);
 
 		/*
 		 * If they want this synchronous then write it and wait for
 		 * it.  Otherwise, if on a cluster boundary write it
 		 * asynchronously so we can move on to the next block
 		 * without delay.  Otherwise do a delayed write because we
 		 * may want to write somemore into the block later.
 		 */
 		if (ioflag & IO_SYNC)
 			(void) bwrite(bp);
 		else if (n + croffset == pmp->pm_bpcluster)
 			bawrite(bp);
 		else
 			bdwrite(bp);
 		dep->de_flag |= DE_UPDATE;
 	} while (error == 0 && uio->uio_resid > 0);
 
 	/*
 	 * If the write failed and they want us to, truncate the file back
 	 * to the size it was before the write was attempted.
 	 */
 errexit:
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		} else {
 			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
 			if (uio->uio_resid != resid)
 				error = 0;
 		}
 	} else if (ioflag & IO_SYNC)
 		error = deupdat(dep, 1);
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 msdosfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 	struct buf *bp, *nbp;
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("msdosfs_fsync: not dirty");
 		bremfree(bp);
 		splx(s);
 		(void) bwrite(bp);
 		goto loop;
 	}
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0);
 	}
 #ifdef DIAGNOSTIC
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vprint("msdosfs_fsync: dirty", vp);
 		goto loop;
 	}
 #endif
 	splx(s);
 	return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
 }
 
 static int
 msdosfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 	struct denode *ddep = VTODE(ap->a_dvp);
 	int error;
 
 	if (ap->a_vp->v_type == VDIR)
 		error = EPERM;
 	else
 		error = removede(ddep, dep);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
 #endif
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what links are. But since we already called
  * msdosfs_lookup() with create and lockparent, the parent is locked so we
  * have to free it before we return the error.
  */
 static int
 msdosfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	VOP_ABORTOP(ap->a_tdvp, ap->a_cnp);
 	return (EOPNOTSUPP);
 }
 
 /*
  * Renames on files require moving the denode to a new hash queue since the
  * denode's location is used to compute which hash queue to put the file
  * in. Unless it is a rename in place.  For example "mv a b".
  *
  * What follows is the basic algorithm:
  *
  * if (file move) {
  *	if (dest file exists) {
  *		remove dest file
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing directory slot
  *	} else {
  *		write new entry in dest directory
  *		update offset and dirclust in denode
  *		move denode to new hash chain
  *		clear old directory entry
  *	}
  * } else {
  *	directory move
  *	if (dest directory exists) {
  *		if (dest is not empty) {
  *			return ENOTEMPTY
  *		}
  *		remove dest directory
  *	}
  *	if (dest and src in same directory) {
  *		rewrite name in existing entry
  *	} else {
  *		be sure dest is not a child of src directory
  *		write entry in dest directory
  *		update "." and ".." in moved directory
  *		clear old directory entry for moved directory
  *	}
  * }
  *
  * On entry:
  *	source's parent directory is unlocked
  *	source file or directory is unlocked
  *	destination's parent directory is locked
  *	destination file or directory is locked if it exists
  *
  * On exit:
  *	all denodes should be released
  *
  * Notes:
  * I'm not sure how the memory containing the pathnames pointed at by the
  * componentname structures is freed, there may be some memory bleeding
  * for each rename done.
  */
 static int
 msdosfs_rename(ap)
 	struct vop_rename_args /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct proc *p = fcnp->cn_proc;
 	struct denode *ip, *xp, *dp, *zp;
 	u_char toname[11], oldname[11];
 	u_long from_diroffset, to_diroffset;
 	u_char to_count;
 	int doingdirectory = 0, newparent = 0;
 	int error;
 	u_long cn;
 	daddr_t bn;
 	struct denode *fddep;	/* from file's parent directory	 */
 	struct denode *fdep;	/* from file or directory	 */
 	struct denode *tddep;	/* to file's parent directory	 */
 	struct denode *tdep;	/* to file or directory		 */
 	struct msdosfsmount *pmp;
 	struct direntry *dotdotp;
 	struct buf *bp;
 
 	fddep = VTODE(ap->a_fdvp);
 	fdep = VTODE(ap->a_fvp);
 	tddep = VTODE(ap->a_tdvp);
 	tdep = tvp ? VTODE(tvp) : NULL;
 	pmp = fddep->de_pmp;
 
 	pmp = VFSTOMSDOSFS(fdvp->v_mount);
 
 #ifdef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_rename: no name");
 #endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, tcnp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fdvp, fcnp);
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	/*
 	 * If source and dest are the same, do nothing.
 	 */
 	if (tvp == fvp) {
 		error = 0;
 		goto abortit;
 	}
 
 	error = vn_lock(fvp, LK_EXCLUSIVE, p);
 	if (error)
 		goto abortit;
 	dp = VTODE(fdvp);
 	ip = VTODE(fvp);
 
 	/*
 	 * Be sure we are not renaming ".", "..", or an alias of ".". This
 	 * leads to a crippled directory tree.  It's pretty tough to do a
 	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
 	 * doesn't work if the ".." entry is missing.
 	 */
 	if (ip->de_Attributes & ATTR_DIRECTORY) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip ||
 		    (fcnp->cn_flags & ISDOTDOT) ||
 		    (tcnp->cn_flags & ISDOTDOT) ||
 		    (ip->de_flag & DE_RENAME)) {
 			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->de_flag |= DE_RENAME;
 		doingdirectory++;
 	}
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTODE(tdvp);
 	xp = tvp ? VTODE(tvp) : NULL;
 	/*
 	 * Remember direntry place to use for destination
 	 */
 	to_diroffset = dp->de_fndoffset;
 	to_count = dp->de_fndcnt;
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory heirarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to doscheckpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 	VOP_UNLOCK(fvp, 0, p);
 	if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster)
 		newparent = 1;
 	vrele(fdvp);
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		/*
 		 * doscheckpath() vput()'s dp,
 		 * so we have to do a relookup afterwards
 		 */
 		error = doscheckpath(ip, dp);
 		if (error)
 			goto out;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("msdosfs_rename: lost to startdir");
 		error = relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		dp = VTODE(tdvp);
 		xp = tvp ? VTODE(tvp) : NULL;
 	}
 
 	if (xp != NULL) {
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if (xp->de_Attributes & ATTR_DIRECTORY) {
 			if (!dosdirempty(xp)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = removede(dp, xp);
 		if (error)
 			goto bad;
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * Convert the filename in tcnp into a dos filename. We copy this
 	 * into the denode and directory entry for the destination
 	 * file/directory.
 	 */
 	error = uniqdosname(VTODE(tdvp), tcnp, toname);
 	if (error)
 		goto abortit;
 
 	/*
 	 * Since from wasn't locked at various places above,
 	 * have to do a relookup here.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	if ((fcnp->cn_flags & SAVESTART) == 0)
 		panic("msdosfs_rename: lost from startdir");
 	if (!newparent)
 		VOP_UNLOCK(tdvp, 0, p);
 	(void) relookup(fdvp, &fvp, fcnp);
 	if (fvp == NULL) {
 		/*
 		 * From name has disappeared.
 		 */
 		if (doingdirectory)
 			panic("rename: lost dir entry");
 		vrele(ap->a_fvp);
 		if (newparent)
 			VOP_UNLOCK(tdvp, 0, p);
 		vrele(tdvp);
 		return 0;
 	}
 	xp = VTODE(fvp);
 	zp = VTODE(fdvp);
 	from_diroffset = zp->de_fndoffset;
 
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed till now. If the source is a file the entry may
 	 * have been unlinked or renamed. In either case there is
 	 * no further work to be done. If the source is a directory
 	 * then it cannot have been rmdir'ed or renamed; this is
 	 * prohibited by the DE_RENAME flag.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
 			panic("rename: lost dir entry");
 		vrele(ap->a_fvp);
 		VOP_UNLOCK(fvp, 0, p);
 		if (newparent)
 			VOP_UNLOCK(fdvp, 0, p);
 		xp = NULL;
 	} else {
 		vrele(fvp);
 		xp = NULL;
 
 		/*
 		 * First write a new entry in the destination
 		 * directory and mark the entry in the source directory
 		 * as deleted.  Then move the denode to the correct hash
 		 * chain for its new location in the filesystem.  And, if
 		 * we moved a directory, then update its .. entry to point
 		 * to the new parent directory.
 		 */
 		bcopy(ip->de_Name, oldname, 11);
 		bcopy(toname, ip->de_Name, 11);	/* update denode */
 		dp->de_fndoffset = to_diroffset;
 		dp->de_fndcnt = to_count;
 		error = createde(ip, dp, (struct denode **)0, tcnp);
 		if (error) {
 			bcopy(oldname, ip->de_Name, 11);
 			if (newparent)
 				VOP_UNLOCK(fdvp, 0, p);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		ip->de_refcnt++;
 		zp->de_fndoffset = from_diroffset;
 		error = removede(zp, ip);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			if (newparent)
 				VOP_UNLOCK(fdvp, 0, p);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		if (!doingdirectory) {
 			error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0,
 				       &ip->de_dirclust, 0);
 			if (error) {
 				/* XXX should really panic here, fs is corrupt */
 				if (newparent)
 					VOP_UNLOCK(fdvp, 0, p);
 				VOP_UNLOCK(fvp, 0, p);
 				goto bad;
 			}
 			if (ip->de_dirclust == MSDOSFSROOT)
 				ip->de_diroffset = to_diroffset;
 			else
 				ip->de_diroffset = to_diroffset & pmp->pm_crbomask;
 		}
 		reinsert(ip);
 		if (newparent)
 			VOP_UNLOCK(fdvp, 0, p);
 	}
 
 	/*
 	 * If we moved a directory to a new parent directory, then we must
 	 * fixup the ".." entry in the moved directory.
 	 */
 	if (doingdirectory && newparent) {
 		cn = ip->de_StartCluster;
 		if (cn == MSDOSFSROOT) {
 			/* this should never happen */
 			panic("msdosfs_rename(): updating .. in root directory?");
 		} else
 			bn = cntobn(pmp, cn);
 		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
 			      NOCRED, &bp);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			brelse(bp);
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 		dotdotp = (struct direntry *)bp->b_data + 1;
 		putushort(dotdotp->deStartCluster, dp->de_StartCluster);
 		if (FAT32(pmp))
 			putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16);
 		error = bwrite(bp);
 		if (error) {
 			/* XXX should really panic here, fs is corrupt */
 			VOP_UNLOCK(fvp, 0, p);
 			goto bad;
 		}
 	}
 
 	VOP_UNLOCK(fvp, 0, p);
 bad:
 	if (xp)
 		vput(tvp);
 	vput(tdvp);
 out:
 	ip->de_flag &= ~DE_RENAME;
 	vrele(fdvp);
 	vrele(fvp);
 	return (error);
 
 }
 
 static struct {
 	struct direntry dot;
 	struct direntry dotdot;
 } dosdirtemplate = {
 	{	".       ", "   ",			/* the . entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,	 				/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 } 				/* filesize */
 	},
 	{	"..      ", "   ",			/* the .. entry */
 		ATTR_DIRECTORY,				/* file attribute */
 		0,	 				/* reserved */
 		0, { 0, 0 }, { 0, 0 },			/* create time & date */
 		{ 0, 0 },				/* access date */
 		{ 0, 0 },				/* high bits of start cluster */
 		{ 210, 4 }, { 210, 4 },			/* modify time & date */
 		{ 0, 0 },				/* startcluster */
 		{ 0, 0, 0, 0 }				/* filesize */
 	}
 };
 
 static int
 msdosfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struvt vnode **a_vpp;
 		struvt componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct denode *dep;
 	struct denode *pdep = VTODE(ap->a_dvp);
 	struct direntry *denp;
 	struct msdosfsmount *pmp = pdep->de_pmp;
 	struct buf *bp;
 	u_long newcluster, pcl;
 	int bn;
 	int error;
 	struct denode ndirent;
 	struct timespec ts;
 
 	/*
 	 * If this is the root directory and there is no space left we
 	 * can't do anything.  This is because the root directory can not
 	 * change size.
 	 */
 	if (pdep->de_StartCluster == MSDOSFSROOT
 	    && pdep->de_fndoffset >= pdep->de_FileSize) {
 		error = ENOSPC;
 		goto bad2;
 	}
 
 	/*
 	 * Allocate a cluster to hold the about to be created directory.
 	 */
 	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
 	if (error)
 		goto bad2;
 
 	bzero(&ndirent, sizeof(ndirent));
 	ndirent.de_pmp = pmp;
 	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
 	getnanotime(&ts);
 	DETIMES(&ndirent, &ts, &ts, &ts);
 
 	/*
 	 * Now fill the cluster with the "." and ".." entries. And write
 	 * the cluster to disk.  This way it is there for the parent
 	 * directory to be pointing at if there were a crash.
 	 */
 	bn = cntobn(pmp, newcluster);
 	/* always succeeds */
 	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0);
 	bzero(bp->b_data, pmp->pm_bpcluster);
 	bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate);
 	denp = (struct direntry *)bp->b_data;
 	putushort(denp[0].deStartCluster, newcluster);
 	putushort(denp[0].deCDate, ndirent.de_CDate);
 	putushort(denp[0].deCTime, ndirent.de_CTime);
 	denp[0].deCHundredth = ndirent.de_CHun;
 	putushort(denp[0].deADate, ndirent.de_ADate);
 	putushort(denp[0].deMDate, ndirent.de_MDate);
 	putushort(denp[0].deMTime, ndirent.de_MTime);
 	pcl = pdep->de_StartCluster;
 	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
 		pcl = 0;
 	putushort(denp[1].deStartCluster, pcl);
 	putushort(denp[1].deCDate, ndirent.de_CDate);
 	putushort(denp[1].deCTime, ndirent.de_CTime);
 	denp[1].deCHundredth = ndirent.de_CHun;
 	putushort(denp[1].deADate, ndirent.de_ADate);
 	putushort(denp[1].deMDate, ndirent.de_MDate);
 	putushort(denp[1].deMTime, ndirent.de_MTime);
 	if (FAT32(pmp)) {
 		putushort(denp[0].deHighClust, newcluster >> 16);
 		putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16);
 	}
 
 	error = bwrite(bp);
 	if (error)
 		goto bad;
 
 	/*
 	 * Now build up a directory entry pointing to the newly allocated
 	 * cluster.  This will be written to an empty slot in the parent
 	 * directory.
 	 */
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("msdosfs_mkdir: no name");
 #endif
 	error = uniqdosname(pdep, cnp, ndirent.de_Name);
 	if (error)
 		goto bad;
 
 	ndirent.de_Attributes = ATTR_DIRECTORY;
 	ndirent.de_LowerCase = 0;
 	ndirent.de_StartCluster = newcluster;
 	ndirent.de_FileSize = 0;
 	ndirent.de_dev = pdep->de_dev;
 	ndirent.de_devvp = pdep->de_devvp;
 	error = createde(&ndirent, pdep, &dep, cnp);
 	if (error)
 		goto bad;
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	*ap->a_vpp = DETOV(dep);
 	return (0);
 
 bad:
 	clusterfree(pmp, newcluster, NULL);
 bad2:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 static int
 msdosfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct denode *ip, *dp;
 	struct proc *p = cnp->cn_proc;
 	int error;
 	
 	ip = VTODE(vp);
 	dp = VTODE(dvp);
 
 	/*
 	 * Verify the directory is empty (and valid).
 	 * (Rmdir ".." won't be valid since
 	 *  ".." will contain a reference to
 	 *  the current directory and thus be
 	 *  non-empty.)
 	 */
 	error = 0;
 	if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	/*
 	 * Delete the entry from the directory.  For dos filesystems this
 	 * gets rid of the directory entry on disk, the in memory copy
 	 * still exists but the de_refcnt is <= 0.  This prevents it from
 	 * being found by deget().  When the vput() on dep is done we give
 	 * up access and eventually msdosfs_reclaim() will be called which
 	 * will remove it from the denode cache.
 	 */
 	error = removede(dp, ip);
 	if (error)
 		goto out;
 	/*
 	 * This is where we decrement the link count in the parent
 	 * directory.  Since dos filesystems don't do this we just purge
 	 * the name cache.
 	 */
 	cache_purge(dvp);
 	VOP_UNLOCK(dvp, 0, p);
 	/*
 	 * Truncate the directory that is being deleted.
 	 */
 	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p);
 	cache_purge(vp);
 
 	vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 out:
 	return (error);
 }
 
 /*
  * DOS filesystems don't know what symlinks are.
  */
 static int
 msdosfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	/* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */
 	return (EOPNOTSUPP);
 }
 
 static int
 msdosfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	int error = 0;
 	int diff;
 	long n;
 	int blsize;
 	long on;
 	u_long cn;
 	u_long fileno;
 	u_long dirsperblk;
 	long bias = 0;
 	daddr_t bn, lbn;
 	struct buf *bp;
 	struct denode *dep = VTODE(ap->a_vp);
 	struct msdosfsmount *pmp = dep->de_pmp;
 	struct direntry *dentp;
 	struct dirent dirbuf;
 	struct uio *uio = ap->a_uio;
 	u_long *cookies = NULL;
 	int ncookies = 0;
 	off_t offset, off;
 	int chksum = -1;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
 	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
 #endif
 
 	/*
 	 * msdosfs_readdir() won't operate properly on regular files since
 	 * it does i/o only with the the filesystem vnode, and hence can
 	 * retrieve the wrong block from the buffer cache for a plain file.
 	 * So, fail attempts to readdir() on a plain file.
 	 */
 	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
 		return (ENOTDIR);
 
 	/*
 	 * To be safe, initialize dirbuf
 	 */
 	bzero(dirbuf.d_name, sizeof(dirbuf.d_name));
 
 	/*
 	 * If the user buffer is smaller than the size of one dos directory
 	 * entry or the file offset is not a multiple of the size of a
 	 * directory entry, then we fail the read.
 	 */
 	off = offset = uio->uio_offset;
 	if (uio->uio_resid < sizeof(struct direntry) ||
 	    (offset & (sizeof(struct direntry) - 1)))
 		return (EINVAL);
 
 	if (ap->a_ncookies) {
 		ncookies = uio->uio_resid / 16;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
 		       M_WAITOK);
 		*ap->a_cookies = cookies;
 		*ap->a_ncookies = ncookies;
 	}
 
 	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
 
 	/*
 	 * If they are reading from the root directory then, we simulate
 	 * the . and .. entries since these don't exist in the root
 	 * directory.  We also set the offset bias to make up for having to
 	 * simulate these entries. By this I mean that at file offset 64 we
 	 * read the first entry in the root directory that lives on disk.
 	 */
 	if (dep->de_StartCluster == MSDOSFSROOT
 	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
 #if 0
 		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
 		    offset);
 #endif
 		bias = 2 * sizeof(struct direntry);
 		if (offset < bias) {
 			for (n = (int)offset / sizeof(struct direntry);
 			     n < 2; n++) {
 				if (FAT32(pmp))
 					dirbuf.d_fileno = cntobn(pmp,
 								 pmp->pm_rootdirblk)
 							  * dirsperblk;
 				else
 					dirbuf.d_fileno = 1;
 				dirbuf.d_type = DT_DIR;
 				switch (n) {
 				case 0:
 					dirbuf.d_namlen = 1;
 					strcpy(dirbuf.d_name, ".");
 					break;
 				case 1:
 					dirbuf.d_namlen = 2;
 					strcpy(dirbuf.d_name, "..");
 					break;
 				}
 				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 				if (uio->uio_resid < dirbuf.d_reclen)
 					goto out;
 				error = uiomove((caddr_t) &dirbuf,
 						dirbuf.d_reclen, uio);
 				if (error)
 					goto out;
 				offset += sizeof(struct direntry);
 				off = offset;
 				if (cookies) {
 					*cookies++ = offset;
 					if (--ncookies <= 0)
 						goto out;
 				}
 			}
 		}
 	}
 
 	off = offset;
 	while (uio->uio_resid > 0) {
 		lbn = de_cluster(pmp, offset - bias);
 		on = (offset - bias) & pmp->pm_crbomask;
 		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
 		diff = dep->de_FileSize - (offset - bias);
 		if (diff <= 0)
 			break;
 		n = min(n, diff);
 		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
 		if (error)
 			break;
 		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 		n = min(n, blsize - bp->b_resid);
 
 		/*
 		 * Convert from dos directory entries to fs-independent
 		 * directory entries.
 		 */
 		for (dentp = (struct direntry *)(bp->b_data + on);
 		     (char *)dentp < bp->b_data + on + n;
 		     dentp++, offset += sizeof(struct direntry)) {
 #if 0
 			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
 			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
 #endif
 			/*
 			 * If this is an unused entry, we can stop.
 			 */
 			if (dentp->deName[0] == SLOT_EMPTY) {
 				brelse(bp);
 				goto out;
 			}
 			/*
 			 * Skip deleted entries.
 			 */
 			if (dentp->deName[0] == SLOT_DELETED) {
 				chksum = -1;
 				continue;
 			}
 
 			/*
 			 * Handle Win95 long directory entries
 			 */
 			if (dentp->deAttributes == ATTR_WIN95) {
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
 				chksum = win2unixfn((struct winentry *)dentp,
 					&dirbuf, chksum,
 					pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
 					pmp->pm_u2w);
 				continue;
 			}
 
 			/*
 			 * Skip volume labels
 			 */
 			if (dentp->deAttributes & ATTR_VOLUME) {
 				chksum = -1;
 				continue;
 			}
 			/*
 			 * This computation of d_fileno must match
 			 * the computation of va_fileid in
 			 * msdosfs_getattr.
 			 */
 			if (dentp->deAttributes & ATTR_DIRECTORY) {
 				fileno = getushort(dentp->deStartCluster);
 				if (FAT32(pmp))
 					fileno |= getushort(dentp->deHighClust) << 16;
 				/* if this is the root directory */
 				if (fileno == MSDOSFSROOT)
 					if (FAT32(pmp))
 						fileno = cntobn(pmp,
 								pmp->pm_rootdirblk)
 							 * dirsperblk;
 					else
 						fileno = 1;
 				else
 					fileno = cntobn(pmp, fileno) * dirsperblk;
 				dirbuf.d_fileno = fileno;
 				dirbuf.d_type = DT_DIR;
 			} else {
 				dirbuf.d_fileno = offset / sizeof(struct direntry);
 				dirbuf.d_type = DT_REG;
 			}
 			if (chksum != winChksum(dentp->deName))
 				dirbuf.d_namlen = dos2unixfn(dentp->deName,
 				    (u_char *)dirbuf.d_name,
 				    dentp->deLowerCase |
 					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
 					(LCASE_BASE | LCASE_EXT) : 0),
 				    pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
 				    pmp->pm_d2u,
 				    pmp->pm_flags & MSDOSFSMNT_ULTABLE,
 				    pmp->pm_ul);
 			else
 				dirbuf.d_name[dirbuf.d_namlen] = 0;
 			chksum = -1;
 			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
 			if (uio->uio_resid < dirbuf.d_reclen) {
 				brelse(bp);
 				goto out;
 			}
 			error = uiomove((caddr_t) &dirbuf,
 					dirbuf.d_reclen, uio);
 			if (error) {
 				brelse(bp);
 				goto out;
 			}
 			if (cookies) {
 				*cookies++ = offset + sizeof(struct direntry);
 				if (--ncookies <= 0) {
 					brelse(bp);
 					goto out;
 				}
 			}
 			off = offset + sizeof(struct direntry);
 		}
 		brelse(bp);
 	}
 out:
 	/* Subtract unused cookies */
 	if (ap->a_ncookies)
 		*ap->a_ncookies -= ncookies;
 
 	uio->uio_offset = off;
 
 	/*
 	 * Set the eofflag (NFS uses it)
 	 */
 	if (ap->a_eofflag) {
 		if (dep->de_FileSize - (offset - bias) <= 0)
 			*ap->a_eofflag = 1;
 		else
 			*ap->a_eofflag = 0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * vp  - address of vnode file the file
  * bn  - which cluster we are interested in mapping to a filesystem block number.
  * vpp - returns the vnode for the block special file holding the filesystem
  *	 containing the file of interest
  * bnp - address of where to return the filesystem relative block number
  */
 static int
 msdosfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = dep->de_devvp;
 	if (ap->a_bnp == NULL)
 		return (0);
 	if (ap->a_runp) {
 		/*
 		 * Sequential clusters should be counted here.
 		 */
 		*ap->a_runp = 0;
 	}
 	if (ap->a_runb) {
 		*ap->a_runb = 0;
 	}
 	return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
 }
 
 static int
 msdosfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp = ap->a_bp;
 	struct denode *dep = VTODE(bp->b_vp);
 	struct vnode *vp;
 	int error = 0;
 
 	if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
 		panic("msdosfs_strategy: spec");
 	/*
 	 * If we don't already know the filesystem relative block number
 	 * then get it using pcbmap().  If pcbmap() returns the block
 	 * number as -1 then we've got a hole in the file.  DOS filesystems
 	 * don't allow files with holes, so we shouldn't ever see this.
 	 */
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
 		if (error) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if (bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	/*
 	 * Read/write the block from/to the disk that contains the desired
 	 * file block.
 	 */
 	vp = dep->de_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOP_STRATEGY(vp, bp);
 	return (0);
 }
 
 static int
 msdosfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *vp;
 	} */ *ap;
 {
 	struct denode *dep = VTODE(ap->a_vp);
 
 	printf(
 	    "tag VT_MSDOSFS, startcluster %lu, dircluster %lu, diroffset %lu ",
 	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
 	printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev));
 	lockmgr_printinfo(&dep->de_lock);
 	printf("\n");
 	return (0);
 }
 
 static int
 msdosfs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * get page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 msdosfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 /*
  * put page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 msdosfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 
 /* Global vfs data structures for msdosfs */
 vop_t **msdosfs_vnodeop_p;
 static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) msdosfs_abortop },
 	{ &vop_access_desc,		(vop_t *) msdosfs_access },
 	{ &vop_bmap_desc,		(vop_t *) msdosfs_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) msdosfs_lookup },
 	{ &vop_close_desc,		(vop_t *) msdosfs_close },
 	{ &vop_create_desc,		(vop_t *) msdosfs_create },
 	{ &vop_fsync_desc,		(vop_t *) msdosfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) msdosfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) msdosfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_link_desc,		(vop_t *) msdosfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) msdosfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) msdosfs_mknod },
 	{ &vop_pathconf_desc,		(vop_t *) msdosfs_pathconf },
 	{ &vop_print_desc,		(vop_t *) msdosfs_print },
 	{ &vop_read_desc,		(vop_t *) msdosfs_read },
 	{ &vop_readdir_desc,		(vop_t *) msdosfs_readdir },
 	{ &vop_reclaim_desc,		(vop_t *) msdosfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) msdosfs_remove },
 	{ &vop_rename_desc,		(vop_t *) msdosfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) msdosfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) msdosfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) msdosfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) msdosfs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) msdosfs_write },
 	{ &vop_getpages_desc,		(vop_t *) msdosfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) msdosfs_putpages },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
 	{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
 
 VNODEOP_SET(msdosfs_vnodeop_opv_desc);
Index: head/sys/nfs/nfs_common.c
===================================================================
--- head/sys/nfs/nfs_common.c	(revision 49534)
+++ head/sys/nfs/nfs_common.c	(revision 49535)
@@ -1,2281 +1,2280 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.78 1999/06/27 11:44:19 peter Exp $
+ * $Id: nfs_subs.c,v 1.79 1999/07/17 18:43:47 phk Exp $
  */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
  * the nfs op functions. They do things like create the rpc header and
  * copy data between mbuf chains and uio lists.
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsrtt.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <netinet/in.h>
 #ifdef ISO
 #include <netiso/iso.h>
 #endif
 
 /*
  * Data items converted to xdr at startup, since they are constant
  * This is kinda hokey, but may save a little time doing byte swaps
  */
 u_int32_t nfs_xdrneg1;
 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
 	rpc_auth_kerb;
 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
 
 /* And other global data */
 static u_int32_t nfs_xid = 0;
 static enum vtype nv2tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON 
 };
 enum vtype nv3tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
 };
 
 int nfs_ticks;
 int nfs_pbuf_freecnt = -1;	/* start out unlimited */
 
 struct nfs_reqq nfs_reqq;
 struct nfssvc_sockhead nfssvc_sockhead;
 int nfssvc_sockhead_flag;
 struct nfsd_head nfsd_head;
 int nfsd_head_flag;
 struct nfs_bufq nfs_bufq;
 struct nqtimerhead nqtimerhead;
 struct nqfhhashhead *nqfhhashtbl;
 u_long nqfhhash;
 
 static void (*nfs_prev_lease_updatetime) __P((int));
 static int nfs_prev_nfssvc_sy_narg;
 static sy_call_t *nfs_prev_nfssvc_sy_call;
 
 #ifndef NFS_NOSERVER
 
 static vop_t *nfs_prev_vop_lease_check;
 static int nfs_prev_getfh_sy_narg;
 static sy_call_t *nfs_prev_getfh_sy_call;
 
 /*
  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
  */
 int nfsv3_procid[NFS_NPROCS] = {
 	NFSPROC_NULL,
 	NFSPROC_GETATTR,
 	NFSPROC_SETATTR,
 	NFSPROC_NOOP,
 	NFSPROC_LOOKUP,
 	NFSPROC_READLINK,
 	NFSPROC_READ,
 	NFSPROC_NOOP,
 	NFSPROC_WRITE,
 	NFSPROC_CREATE,
 	NFSPROC_REMOVE,
 	NFSPROC_RENAME,
 	NFSPROC_LINK,
 	NFSPROC_SYMLINK,
 	NFSPROC_MKDIR,
 	NFSPROC_RMDIR,
 	NFSPROC_READDIR,
 	NFSPROC_FSSTAT,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP
 };
 
 #endif /* NFS_NOSERVER */
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
 int nfsv2_procid[NFS_NPROCS] = {
 	NFSV2PROC_NULL,
 	NFSV2PROC_GETATTR,
 	NFSV2PROC_SETATTR,
 	NFSV2PROC_LOOKUP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_READLINK,
 	NFSV2PROC_READ,
 	NFSV2PROC_WRITE,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_MKDIR,
 	NFSV2PROC_SYMLINK,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_REMOVE,
 	NFSV2PROC_RMDIR,
 	NFSV2PROC_RENAME,
 	NFSV2PROC_LINK,
 	NFSV2PROC_READDIR,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_STATFS,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 };
 
 #ifndef NFS_NOSERVER
 /*
  * Maps errno values to nfs error numbers.
  * Use NFSERR_IO as the catch all for ones not specifically defined in
  * RFC 1094.
  */
 static u_char nfsrv_v2errmap[ELAST] = {
   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO /* << Last is 86 */
 };
 
 /*
  * Maps errno values to nfs error numbers.
  * Although it is not obvious whether or not NFS clients really care if
  * a returned error value is in the specified list for the procedure, the
  * safest thing to do is filter them appropriately. For Version 2, the
  * X/Open XNFS document is the only specification that defines error values
  * for each RPC (The RFC simply lists all possible error values for all RPCs),
  * so I have decided to not do this for Version 2.
  * The first entry is the default error return and the rest are the valid
  * errors for that RPC in increasing numeric order.
  */
 static short nfsv3err_null[] = {
 	0,
 	0,
 };
 
 static short nfsv3err_getattr[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_setattr[] = {
 	NFSERR_IO,
 	NFSERR_PERM,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOT_SYNC,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_lookup[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_access[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_read[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_NXIO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_write[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_FBIG,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_create[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mkdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_symlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mknod[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	NFSERR_BADTYPE,
 	0,
 };
 
 static short nfsv3err_remove[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rmdir[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rename[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_ISDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_link[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdirplus[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_NOTSUPP,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsstat[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsinfo[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_pathconf[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_commit[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short *nfsrv_v3errmap[] = {
 	nfsv3err_null,
 	nfsv3err_getattr,
 	nfsv3err_setattr,
 	nfsv3err_lookup,
 	nfsv3err_access,
 	nfsv3err_readlink,
 	nfsv3err_read,
 	nfsv3err_write,
 	nfsv3err_create,
 	nfsv3err_mkdir,
 	nfsv3err_symlink,
 	nfsv3err_mknod,
 	nfsv3err_remove,
 	nfsv3err_rmdir,
 	nfsv3err_rename,
 	nfsv3err_link,
 	nfsv3err_readdir,
 	nfsv3err_readdirplus,
 	nfsv3err_fsstat,
 	nfsv3err_fsinfo,
 	nfsv3err_pathconf,
 	nfsv3err_commit,
 };
 
 #endif /* NFS_NOSERVER */
 
 extern struct nfsrtt nfsrtt;
 extern time_t nqnfsstarttime;
 extern int nqsrv_clockskew;
 extern int nqsrv_writeslack;
 extern int nqsrv_maxlease;
 extern struct nfsstats nfsstats;
 extern int nqnfs_piggy[NFS_NPROCS];
 extern nfstype nfsv2_type[9];
 extern nfstype nfsv3_type[9];
 extern struct nfsnodehashhead *nfsnodehashtbl;
 extern u_long nfsnodehash;
 
 struct getfh_args;
 extern int getfh(struct proc *, struct getfh_args *, int *);
 struct nfssvc_args;
 extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
 
 LIST_HEAD(nfsnodehashhead, nfsnode);
 
 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
 
 u_quad_t
 nfs_curusec() 
 {
 	struct timeval tv;
 	
 	getmicrotime(&tv);
 	return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
 }
 
 /*
  * Create the header for an rpc request packet
  * The hsiz is the size of the rest of the nfs request header.
  * (just used to decide if a cluster is a good idea)
  */
 struct mbuf *
 nfsm_reqh(vp, procid, hsiz, bposp)
 	struct vnode *vp;
 	u_long procid;
 	int hsiz;
 	caddr_t *bposp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	struct mbuf *mb2;
 	struct nfsmount *nmp;
 	int nqflag;
 
 	MGET(mb, M_WAIT, MT_DATA);
 	if (hsiz >= MINCLSIZE)
 		MCLGET(mb, M_WAIT);
 	mb->m_len = 0;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * For NQNFS, add lease request.
 	 */
 	if (vp) {
 		nmp = VFSTONFS(vp->v_mount);
 		if (nmp->nm_flag & NFSMNT_NQNFS) {
 			nqflag = NQNFS_NEEDLEASE(vp, procid);
 			if (nqflag) {
 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(nqflag);
 				*tl = txdr_unsigned(nmp->nm_leaseterm);
 			} else {
 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = 0;
 			}
 		}
 	}
 	/* Finally, return values */
 	*bposp = bpos;
 	return (mb);
 }
 
 /*
  * Build the RPC header and fill in the authorization info.
  * The authorization string argument is only used when the credentials
  * come from outside of the kernel.
  * Returns the head of the mbuf list.
  */
 struct mbuf *
 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
 	verf_str, mrest, mrest_len, mbp, xidp)
 	register struct ucred *cr;
 	int nmflag;
 	int procid;
 	int auth_type;
 	int auth_len;
 	char *auth_str;
 	int verf_len;
 	char *verf_str;
 	struct mbuf *mrest;
 	int mrest_len;
 	struct mbuf **mbp;
 	u_int32_t *xidp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	register int i;
 	struct mbuf *mreq, *mb2;
 	int siz, grpsiz, authsiz;
 
 	authsiz = nfsm_rndup(auth_len);
 	MGETHDR(mb, M_WAIT, MT_DATA);
 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
 		MCLGET(mb, M_WAIT);
 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
 	} else {
 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
 	}
 	mb->m_len = 0;
 	mreq = mb;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * First the RPC header.
 	 */
 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 
 	/* Get a pretty random xid to start with */
 	if (!nfs_xid) 
 		nfs_xid = random();
 	/*
 	 * Skip zero xid if it should ever happen.
 	 */
 	if (++nfs_xid == 0)
 		nfs_xid++;
 
 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
 	*tl++ = rpc_call;
 	*tl++ = rpc_vers;
 	if (nmflag & NFSMNT_NQNFS) {
 		*tl++ = txdr_unsigned(NQNFS_PROG);
 		*tl++ = txdr_unsigned(NQNFS_VER3);
 	} else {
 		*tl++ = txdr_unsigned(NFS_PROG);
 		if (nmflag & NFSMNT_NFSV3)
 			*tl++ = txdr_unsigned(NFS_VER3);
 		else
 			*tl++ = txdr_unsigned(NFS_VER2);
 	}
 	if (nmflag & NFSMNT_NFSV3)
 		*tl++ = txdr_unsigned(procid);
 	else
 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
 
 	/*
 	 * And then the authorization cred.
 	 */
 	*tl++ = txdr_unsigned(auth_type);
 	*tl = txdr_unsigned(authsiz);
 	switch (auth_type) {
 	case RPCAUTH_UNIX:
 		nfsm_build(tl, u_int32_t *, auth_len);
 		*tl++ = 0;		/* stamp ?? */
 		*tl++ = 0;		/* NULL hostname */
 		*tl++ = txdr_unsigned(cr->cr_uid);
 		*tl++ = txdr_unsigned(cr->cr_groups[0]);
 		grpsiz = (auth_len >> 2) - 5;
 		*tl++ = txdr_unsigned(grpsiz);
 		for (i = 1; i <= grpsiz; i++)
 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
 		break;
 	case RPCAUTH_KERB4:
 		siz = auth_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(auth_str, bpos, i);
 			mb->m_len += i;
 			auth_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 		break;
 	};
 
 	/*
 	 * And the verifier...
 	 */
 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	if (verf_str) {
 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
 		*tl = txdr_unsigned(verf_len);
 		siz = verf_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(verf_str, bpos, i);
 			mb->m_len += i;
 			verf_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 	} else {
 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
 		*tl = 0;
 	}
 	mb->m_next = mrest;
 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
 	*mbp = mb;
 	return (mreq);
 }
 
 /*
  * copies mbuf chain to the uio scatter/gather list
  */
 int
 nfsm_mbuftouio(mrep, uiop, siz, dpos)
 	struct mbuf **mrep;
 	register struct uio *uiop;
 	int siz;
 	caddr_t *dpos;
 {
 	register char *mbufcp, *uiocp;
 	register int xfer, left, len;
 	register struct mbuf *mp;
 	long uiosiz, rem;
 	int error = 0;
 
 	mp = *mrep;
 	mbufcp = *dpos;
 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
 	rem = nfsm_rndup(siz)-siz;
 	while (siz > 0) {
 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
 			return (EFBIG);
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			while (len == 0) {
 				mp = mp->m_next;
 				if (mp == NULL)
 					return (EBADRPC);
 				mbufcp = mtod(mp, caddr_t);
 				len = mp->m_len;
 			}
 			xfer = (left > len) ? len : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(mbufcp, uiocp, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(mbufcp, uiocp, xfer);
 			else
 				copyout(mbufcp, uiocp, xfer);
 			left -= xfer;
 			len -= xfer;
 			mbufcp += xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		if (uiop->uio_iov->iov_len <= siz) {
 			uiop->uio_iovcnt--;
 			uiop->uio_iov++;
 		} else {
 			uiop->uio_iov->iov_base += uiosiz;
 			uiop->uio_iov->iov_len -= uiosiz;
 		}
 		siz -= uiosiz;
 	}
 	*dpos = mbufcp;
 	*mrep = mp;
 	if (rem > 0) {
 		if (len < rem)
 			error = nfs_adv(mrep, dpos, rem, len);
 		else
 			*dpos += rem;
 	}
 	return (error);
 }
 
 /*
  * copies a uio scatter/gather list to an mbuf chain.
  * NOTE: can ony handle iovcnt == 1
  */
 int
 nfsm_uiotombuf(uiop, mq, siz, bpos)
 	register struct uio *uiop;
 	struct mbuf **mq;
 	int siz;
 	caddr_t *bpos;
 {
 	register char *uiocp;
 	register struct mbuf *mp, *mp2;
 	register int xfer, left, mlen;
 	int uiosiz, clflg, rem;
 	char *cp;
 
 #ifdef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfsm_uiotombuf: iovcnt != 1");
 #endif
 
 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
 		clflg = 1;
 	else
 		clflg = 0;
 	rem = nfsm_rndup(siz)-siz;
 	mp = mp2 = *mq;
 	while (siz > 0) {
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			mlen = M_TRAILINGSPACE(mp);
 			if (mlen == 0) {
 				MGET(mp, M_WAIT, MT_DATA);
 				if (clflg)
 					MCLGET(mp, M_WAIT);
 				mp->m_len = 0;
 				mp2->m_next = mp;
 				mp2 = mp;
 				mlen = M_TRAILINGSPACE(mp);
 			}
 			xfer = (left > mlen) ? mlen : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			mp->m_len += xfer;
 			left -= xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		uiop->uio_iov->iov_base += uiosiz;
 		uiop->uio_iov->iov_len -= uiosiz;
 		siz -= uiosiz;
 	}
 	if (rem > 0) {
 		if (rem > M_TRAILINGSPACE(mp)) {
 			MGET(mp, M_WAIT, MT_DATA);
 			mp->m_len = 0;
 			mp2->m_next = mp;
 		}
 		cp = mtod(mp, caddr_t)+mp->m_len;
 		for (left = 0; left < rem; left++)
 			*cp++ = '\0';
 		mp->m_len += rem;
 		*bpos = cp;
 	} else
 		*bpos = mtod(mp, caddr_t)+mp->m_len;
 	*mq = mp;
 	return (0);
 }
 
 /*
  * Help break down an mbuf chain by setting the first siz bytes contiguous
  * pointed to by returned val.
  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
  * cases. (The macros use the vars. dpos and dpos2)
  */
 int
 nfsm_disct(mdp, dposp, siz, left, cp2)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int siz;
 	int left;
 	caddr_t *cp2;
 {
 	register struct mbuf *mp, *mp2;
 	register int siz2, xfer;
 	register caddr_t p;
 
 	mp = *mdp;
 	while (left == 0) {
 		*mdp = mp = mp->m_next;
 		if (mp == NULL)
 			return (EBADRPC);
 		left = mp->m_len;
 		*dposp = mtod(mp, caddr_t);
 	}
 	if (left >= siz) {
 		*cp2 = *dposp;
 		*dposp += siz;
 	} else if (mp->m_next == NULL) {
 		return (EBADRPC);
 	} else if (siz > MHLEN) {
 		panic("nfs S too big");
 	} else {
 		MGET(mp2, M_WAIT, MT_DATA);
 		mp2->m_next = mp->m_next;
 		mp->m_next = mp2;
 		mp->m_len -= left;
 		mp = mp2;
 		*cp2 = p = mtod(mp, caddr_t);
 		bcopy(*dposp, p, left);		/* Copy what was left */
 		siz2 = siz-left;
 		p += left;
 		mp2 = mp->m_next;
 		/* Loop around copying up the siz2 bytes */
 		while (siz2 > 0) {
 			if (mp2 == NULL)
 				return (EBADRPC);
 			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
 			if (xfer > 0) {
 				bcopy(mtod(mp2, caddr_t), p, xfer);
 				NFSMADV(mp2, xfer);
 				mp2->m_len -= xfer;
 				p += xfer;
 				siz2 -= xfer;
 			}
 			if (siz2 > 0)
 				mp2 = mp2->m_next;
 		}
 		mp->m_len = siz;
 		*mdp = mp2;
 		*dposp = mtod(mp2, caddr_t);
 	}
 	return (0);
 }
 
 /*
  * Advance the position in the mbuf chain.
  */
 int
 nfs_adv(mdp, dposp, offs, left)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int offs;
 	int left;
 {
 	register struct mbuf *m;
 	register int s;
 
 	m = *mdp;
 	s = left;
 	while (s < offs) {
 		offs -= s;
 		m = m->m_next;
 		if (m == NULL)
 			return (EBADRPC);
 		s = m->m_len;
 	}
 	*mdp = m;
 	*dposp = mtod(m, caddr_t)+offs;
 	return (0);
 }
 
 /*
  * Copy a string into mbufs for the hard cases...
  */
 int
 nfsm_strtmbuf(mb, bpos, cp, siz)
 	struct mbuf **mb;
 	char **bpos;
 	const char *cp;
 	long siz;
 {
 	register struct mbuf *m1 = NULL, *m2;
 	long left, xfer, len, tlen;
 	u_int32_t *tl;
 	int putsize;
 
 	putsize = 1;
 	m2 = *mb;
 	left = M_TRAILINGSPACE(m2);
 	if (left > 0) {
 		tl = ((u_int32_t *)(*bpos));
 		*tl++ = txdr_unsigned(siz);
 		putsize = 0;
 		left -= NFSX_UNSIGNED;
 		m2->m_len += NFSX_UNSIGNED;
 		if (left > 0) {
 			bcopy(cp, (caddr_t) tl, left);
 			siz -= left;
 			cp += left;
 			m2->m_len += left;
 			left = 0;
 		}
 	}
 	/* Loop around adding mbufs */
 	while (siz > 0) {
 		MGET(m1, M_WAIT, MT_DATA);
 		if (siz > MLEN)
 			MCLGET(m1, M_WAIT);
 		m1->m_len = NFSMSIZ(m1);
 		m2->m_next = m1;
 		m2 = m1;
 		tl = mtod(m1, u_int32_t *);
 		tlen = 0;
 		if (putsize) {
 			*tl++ = txdr_unsigned(siz);
 			m1->m_len -= NFSX_UNSIGNED;
 			tlen = NFSX_UNSIGNED;
 			putsize = 0;
 		}
 		if (siz < m1->m_len) {
 			len = nfsm_rndup(siz);
 			xfer = siz;
 			if (xfer < len)
 				*(tl+(xfer>>2)) = 0;
 		} else {
 			xfer = len = m1->m_len;
 		}
 		bcopy(cp, (caddr_t) tl, xfer);
 		m1->m_len = len+tlen;
 		siz -= xfer;
 		cp += xfer;
 	}
 	*mb = m1;
 	*bpos = mtod(m1, caddr_t)+m1->m_len;
 	return (0);
 }
 
 /*
  * Called once to initialize data structures...
  */
 int
 nfs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 	register int i;
 
 	nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
 
 	/*
 	 * Check to see if major data structures haven't bloated.
 	 */
 	if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
 		printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
 		printf("Try reducing NFS_UIDHASHSIZ\n");
 	}
 	if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
 		printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
 		printf("Try unionizing the nu_nickname and nu_flag fields\n");
 	}
 	nfs_mount_type = vfsp->vfc_typenum;
 	nfsrtt.pos = 0;
 	rpc_vers = txdr_unsigned(RPC_VER2);
 	rpc_call = txdr_unsigned(RPC_CALL);
 	rpc_reply = txdr_unsigned(RPC_REPLY);
 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
 	nfs_prog = txdr_unsigned(NFS_PROG);
 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
 	nfs_true = txdr_unsigned(TRUE);
 	nfs_false = txdr_unsigned(FALSE);
 	nfs_xdrneg1 = txdr_unsigned(-1);
 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
 	if (nfs_ticks < 1)
 		nfs_ticks = 1;
 	/* Ensure async daemons disabled */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
 		nfs_iodwant[i] = (struct proc *)0;
 		nfs_iodmount[i] = (struct nfsmount *)0;
 	}
 	nfs_nhinit();			/* Init the nfsnode table */
 #ifndef NFS_NOSERVER
 	nfsrv_init(0);			/* Init server data structures */
 	nfsrv_initcache();		/* Init the server request cache */
 #endif
 
 	/*
 	 * Initialize the nqnfs server stuff.
 	 */
 	if (nqnfsstarttime == 0) {
 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
 			+ nqsrv_clockskew + nqsrv_writeslack;
 		NQLOADNOVRAM(nqnfsstarttime);
 		CIRCLEQ_INIT(&nqtimerhead);
 		nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
 	}
 
 	/*
 	 * Initialize reply list and start timer
 	 */
 	TAILQ_INIT(&nfs_reqq);
 
 	nfs_timer(0);
 
 	/*
 	 * Set up lease_check and lease_updatetime so that other parts
 	 * of the system can call us, if we are loadable.
 	 */
 #ifndef NFS_NOSERVER
 	nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
 	default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
 #endif
 	nfs_prev_lease_updatetime = lease_updatetime;
 	lease_updatetime = nfs_lease_updatetime;
 	nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
 	sysent[SYS_nfssvc].sy_narg = 2;
 	nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
 	sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
 #ifndef NFS_NOSERVER
 	nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
 	sysent[SYS_getfh].sy_narg = 2;
 	nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
 	sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
 #endif
 
 	nfs_pbuf_freecnt = nswbuf / 2 + 1;
 
 	return (0);
 }
 
 int
 nfs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
 	nfs_mount_type = -1;
 #ifndef NFS_NOSERVER
 	default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
 #endif
 	lease_updatetime = nfs_prev_lease_updatetime;
 	sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
 	sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
 #ifndef NFS_NOSERVER
 	sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
 	sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
 #endif
 	return (0);
 }
 
 /*
  * Attribute cache routines.
  * nfs_loadattrcache() - loads or updates the cache contents from attributes
  *	that are on the mbuf list
  * nfs_getattrcache() - returns valid attributes if found in cache, returns
  *	error otherwise
  */
 
 /*
  * Load the attribute cache (that lives in the nfsnode entry) with
  * the values on the mbuf list and
  * Iff vap not NULL
  *    copy the attributes to *vaper
  */
 int
 nfs_loadattrcache(vpp, mdp, dposp, vaper)
 	struct vnode **vpp;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vattr *vaper;
 {
 	register struct vnode *vp = *vpp;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 	register struct nfsnode *np;
 	register int32_t t1;
 	caddr_t cp2;
 	int error = 0, rdev;
 	struct mbuf *md;
 	enum vtype vtyp;
 	u_short vmode;
 	struct timespec mtime;
 	struct vnode *nvp;
 	int v3 = NFS_ISV3(vp);
 
 	md = *mdp;
 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
 	if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
 		return (error);
 	fp = (struct nfs_fattr *)cp2;
 	if (v3) {
 		vtyp = nfsv3tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
 			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
 	} else {
 		vtyp = nfsv2tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		/*
 		 * XXX
 		 *
 		 * The duplicate information returned in fa_type and fa_mode
 		 * is an ambiguity in the NFS version 2 protocol.
 		 *
 		 * VREG should be taken literally as a regular file.  If a
 		 * server intents to return some type information differently
 		 * in the upper bits of the mode field (e.g. for sockets, or
 		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
 		 * leave the examination of the mode bits even in the VREG
 		 * case to avoid breakage for bogus servers, but we make sure
 		 * that there are actually type bits set in the upper part of
 		 * fa_mode (and failing that, trust the va_type field).
 		 *
 		 * NFSv3 cleared the issue, and requires fa_mode to not
 		 * contain any type information (while also introduing sockets
 		 * and FIFOs for fa_type).
 		 */
 		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
 			vtyp = IFTOVT(vmode);
 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
 
 		/*
 		 * Really ugly NFSv2 kludge.
 		 */
 		if (vtyp == VCHR && rdev == 0xffffffff)
 			vtyp = VFIFO;
 	}
 
 	/*
 	 * If v_type == VNON it is a new node, so fill in the v_type,
 	 * n_mtime fields. Check to see if it represents a special
 	 * device, and if so, check for a possible alias. Once the
 	 * correct vnode has been obtained, fill in the rest of the
 	 * information.
 	 */
 	np = VTONFS(vp);
 	if (vp->v_type != vtyp) {
 		vp->v_type = vtyp;
 		if (vp->v_type == VFIFO) {
 			vp->v_op = fifo_nfsv2nodeop_p;
 		}
 		if (vp->v_type == VCHR || vp->v_type == VBLK) {
 			vp->v_op = spec_nfsv2nodeop_p;
 			nvp = checkalias(vp, rdev, vp->v_mount);
 			if (nvp) {
 				/*
 				 * Discard unneeded vnode, but save its nfsnode.
 				 * Since the nfsnode does not have a lock, its
 				 * vnode lock has to be carried over.
 				 */
 				nvp->v_vnlock = vp->v_vnlock;
 				vp->v_vnlock = NULL;
 				nvp->v_data = vp->v_data;
 				vp->v_data = NULL;
 				vp->v_op = spec_vnodeop_p;
 				vrele(vp);
 				vgone(vp);
 				/*
 				 * Reinitialize aliased node.
 				 */
 				np->n_vnode = nvp;
 				*vpp = vp = nvp;
 			}
 		}
 		np->n_mtime = mtime.tv_sec;
 	}
 	vap = &np->n_vattr;
 	vap->va_type = vtyp;
 	vap->va_mode = (vmode & 07777);
 	vap->va_rdev = rdev;
 	vap->va_mtime = mtime;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	if (v3) {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_hyper(&fp->fa3_size);
 		vap->va_blocksize = NFS_FABLKSIZE;
 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
 		vap->va_fileid = fxdr_unsigned(int32_t,
 		    fp->fa3_fileid.nfsuquad[1]);
 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
 		vap->va_flags = 0;
 		vap->va_filerev = 0;
 	} else {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
 		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
 		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
 		    * NFS_FABLKSIZE;
 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
 		vap->va_flags = 0;
 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
 		    fp->fa2_ctime.nfsv2_sec);
 		vap->va_ctime.tv_nsec = 0;
 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	np->n_attrstamp = time_second;
 	if (vaper != NULL) {
 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
 		if (np->n_flag & NCHG) {
 			if (np->n_flag & NACC)
 				vaper->va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vaper->va_mtime = np->n_mtim;
 		}
 	}
 	return (0);
 }
 
 #ifdef NFS_ACDEBUG
 #include <sys/sysctl.h>
 SYSCTL_DECL(_vfs_nfs);
 static int nfs_acdebug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
 #endif
 
 /*
  * Check the time stamp
  * If the cache is valid, copy contents to *vap and return 0
  * otherwise return an error
  */
 int
 nfs_getattrcache(vp, vaper)
 	register struct vnode *vp;
 	struct vattr *vaper;
 {
 	register struct nfsnode *np;
 	register struct vattr *vap;
 	struct nfsmount *nmp;
 	int timeo;
 
 	np = VTONFS(vp);
 	vap = &np->n_vattr;
 	nmp = VFSTONFS(vp->v_mount);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime) / 10;
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug>1)
 		printf("nfs_getattrcache: initial timeo = %d\n", timeo);
 #endif
 
 	if (vap->va_type == VDIR) {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
 			timeo = nmp->nm_acdirmin;
 		else if (timeo > nmp->nm_acdirmax)
 			timeo = nmp->nm_acdirmax;
 	} else {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
 			timeo = nmp->nm_acregmin;
 		else if (timeo > nmp->nm_acregmax)
 			timeo = nmp->nm_acregmax;
 	}
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug > 2)
 		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
 			nmp->nm_acregmin, nmp->nm_acregmax,
 			nmp->nm_acdirmin, nmp->nm_acdirmax);
 
 	if (nfs_acdebug)
 		printf("nfs_getattrcache: age = %d; final timeo = %d\n",
 			(time_second - np->n_attrstamp), timeo);
 #endif
 
 	if ((time_second - np->n_attrstamp) >= timeo) {
 		nfsstats.attrcache_misses++;
 		return (ENOENT);
 	}
 	nfsstats.attrcache_hits++;
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
 	if (np->n_flag & NCHG) {
 		if (np->n_flag & NACC)
 			vaper->va_atime = np->n_atim;
 		if (np->n_flag & NUPD)
 			vaper->va_mtime = np->n_mtim;
 	}
 	return (0);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Set up nameidata for a lookup() call and do it.
  *
  * If pubflag is set, this call is done for a lookup operation on the
  * public filehandle. In that case we allow crossing mountpoints and
  * absolute pathnames. However, the caller is expected to check that
  * the lookup result is within the public fs, and deny access if
  * it is not.
  *
  * nfs_namei() clears out garbage fields that namei() might leave garbage.
  * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
  * error occurs but the parent was not requested.
  *
  * dirp may be set whether an error is returned or not, and must be 
  * released by the caller.
  */
 int
 nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
 	register struct nameidata *ndp;
 	fhandle_t *fhp;
 	int len;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vnode **retdirp;
 	struct proc *p;
 	int kerbflag, pubflag;
 {
 	register int i, rem;
 	register struct mbuf *md;
 	register char *fromcp, *tocp, *cp;
 	struct iovec aiov;
 	struct uio auio;
 	struct vnode *dp;
 	int error, rdonly, linklen;
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	*retdirp = (struct vnode *)0;
 	cnp->cn_pnbuf = zalloc(namei_zone);
 
 	/*
 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
 	 * and set the various ndp fields appropriately.
 	 */
 	fromcp = *dposp;
 	tocp = cnp->cn_pnbuf;
 	md = *mdp;
 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
 	cnp->cn_hash = 0;
 	for (i = 0; i < len; i++) {
 		while (rem == 0) {
 			md = md->m_next;
 			if (md == NULL) {
 				error = EBADRPC;
 				goto out;
 			}
 			fromcp = mtod(md, caddr_t);
 			rem = md->m_len;
 		}
 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
 			error = EACCES;
 			goto out;
 		}
 		cnp->cn_hash += (unsigned char)*fromcp;
 		*tocp++ = *fromcp++;
 		rem--;
 	}
 	*tocp = '\0';
 	*mdp = md;
 	*dposp = fromcp;
 	len = nfsm_rndup(len)-len;
 	if (len > 0) {
 		if (rem >= len)
 			*dposp += len;
 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
 			goto out;
 	}
 
 	/*
 	 * Extract and set starting directory.
 	 */
 	error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
 	    nam, &rdonly, kerbflag, pubflag);
 	if (error)
 		goto out;
 	if (dp->v_type != VDIR) {
 		vrele(dp);
 		error = ENOTDIR;
 		goto out;
 	}
 
 	if (rdonly)
 		cnp->cn_flags |= RDONLY;
 
 	/*
 	 * Set return directory.  Reference to dp is implicitly transfered 
 	 * to the returned pointer
 	 */
 	*retdirp = dp;
 
 	if (pubflag) {
 		/*
 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
 		 * and the 'native path' indicator.
 		 */
 		cp = zalloc(namei_zone);
 		fromcp = cnp->cn_pnbuf;
 		tocp = cp;
 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
 			switch ((unsigned char)*fromcp) {
 			case WEBNFS_NATIVE_CHAR:
 				/*
 				 * 'Native' path for us is the same
 				 * as a path according to the NFS spec,
 				 * just skip the escape char.
 				 */
 				fromcp++;
 				break;
 			/*
 			 * More may be added in the future, range 0x80-0xff
 			 */
 			default:
 				error = EIO;
 				zfree(namei_zone, cp);
 				goto out;
 			}
 		}
 		/*
 		 * Translate the '%' escapes, URL-style.
 		 */
 		while (*fromcp != '\0') {
 			if (*fromcp == WEBNFS_ESC_CHAR) {
 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
 					fromcp++;
 					*tocp++ = HEXSTRTOI(fromcp);
 					fromcp += 2;
 					continue;
 				} else {
 					error = ENOENT;
 					zfree(namei_zone, cp);
 					goto out;
 				}
 			} else
 				*tocp++ = *fromcp++;
 		}
 		*tocp = '\0';
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_pnbuf = cp;
 	}
 
 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (pubflag) {
 		ndp->ni_rootdir = rootvnode;
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/')
 			dp = rootvnode;
 	} else {
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * becuase lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_proc = p;
 	VREF(dp);
 	ndp->ni_startdir = dp;
 
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial 
 		 * termination occurs if no symlink encountered.
 		 * Note: zfree is safe because error is 0, so we will
 		 * not zfree it again when we break.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			nfsrv_object_create(ndp->ni_vp);
 			if (cnp->cn_flags & (SAVENAME | SAVESTART))
 				cnp->cn_flags |= HASBUF;
 			else
 				zfree(namei_zone, cnp->cn_pnbuf);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			VOP_UNLOCK(ndp->ni_dvp, 0, p);
 		if (!pubflag) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = zalloc(namei_zone);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = (struct proc *)0;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory 
 		 * should replace current directory.  Normally ni_dvp 
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 
 	/*
 	 * nfs_namei() guarentees that fields will not contain garbage
 	 * whether an error occurs or not.  This allows the caller to track
 	 * cleanup state trivially.
 	 */
 out:
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 		cnp->cn_flags &= ~HASBUF;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 	return (error);
 }
 
 /*
  * A fiddled version of m_adj() that ensures null fill to a long
  * boundary and only trims off the back end
  */
 void
 nfsm_adj(mp, len, nul)
 	struct mbuf *mp;
 	register int len;
 	int nul;
 {
 	register struct mbuf *m;
 	register int count, i;
 	register char *cp;
 
 	/*
 	 * Trim from tail.  Scan the mbuf chain,
 	 * calculating its length and finding the last mbuf.
 	 * If the adjustment only affects this mbuf, then just
 	 * adjust and return.  Otherwise, rescan and truncate
 	 * after the remaining size.
 	 */
 	count = 0;
 	m = mp;
 	for (;;) {
 		count += m->m_len;
 		if (m->m_next == (struct mbuf *)0)
 			break;
 		m = m->m_next;
 	}
 	if (m->m_len > len) {
 		m->m_len -= len;
 		if (nul > 0) {
 			cp = mtod(m, caddr_t)+m->m_len-nul;
 			for (i = 0; i < nul; i++)
 				*cp++ = '\0';
 		}
 		return;
 	}
 	count -= len;
 	if (count < 0)
 		count = 0;
 	/*
 	 * Correct length for chain is "count".
 	 * Find the mbuf with last data, adjust its length,
 	 * and toss data from remaining mbufs on chain.
 	 */
 	for (m = mp; m; m = m->m_next) {
 		if (m->m_len >= count) {
 			m->m_len = count;
 			if (nul > 0) {
 				cp = mtod(m, caddr_t)+m->m_len-nul;
 				for (i = 0; i < nul; i++)
 					*cp++ = '\0';
 			}
 			break;
 		}
 		count -= m->m_len;
 	}
 	for (m = m->m_next;m;m = m->m_next)
 		m->m_len = 0;
 }
 
 /*
  * Make these functions instead of macros, so that the kernel text size
  * doesn't get too big...
  */
 void
 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int before_ret;
 	register struct vattr *before_vap;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 
 	if (before_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		*tl++ = nfs_true;
 		txdr_hyper(before_vap->va_size, tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
 	}
 	*bposp = bpos;
 	*mbp = mb;
 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
 }
 
 void
 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 	register struct nfs_fattr *fp;
 
 	if (after_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
 		*tl++ = nfs_true;
 		fp = (struct nfs_fattr *)tl;
 		nfsm_srvfattr(nfsd, after_vap, fp);
 	}
 	*mbp = mb;
 	*bposp = bpos;
 }
 
 void
 nfsm_srvfattr(nfsd, vap, fp)
 	register struct nfsrv_descript *nfsd;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 {
 
 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
 	fp->fa_uid = txdr_unsigned(vap->va_uid);
 	fp->fa_gid = txdr_unsigned(vap->va_gid);
 	if (nfsd->nd_flag & ND_NFSV3) {
 		fp->fa_type = vtonfsv3_type(vap->va_type);
 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
 		txdr_hyper(vap->va_size, &fp->fa3_size);
 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
 		fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
 		fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
 		fp->fa3_fsid.nfsuquad[0] = 0;
 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
 		fp->fa3_fileid.nfsuquad[0] = 0;
 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
 	} else {
 		fp->fa_type = vtonfsv2_type(vap->va_type);
 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		fp->fa2_size = txdr_unsigned(vap->va_size);
 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
 		if (vap->va_type == VFIFO)
 			fp->fa2_rdev = 0xffffffff;
 		else
 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
 	}
 }
 
 /*
  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling VFS_FHTOVP()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	- if not lockflag unlock it with VOP_UNLOCK()
  */
 int
 nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
 	fhandle_t *fhp;
 	int lockflag;
 	struct vnode **vpp;
 	struct ucred *cred;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	int *rdonlyp;
 	int kerbflag;
 	int pubflag;
 {
 	struct proc *p = curproc; /* XXX */
 	register struct mount *mp;
 	register int i;
 	struct ucred *credanon;
 	int error, exflags;
 #ifdef MNT_EXNORESPORT		/* XXX needs mountd and /etc/exports help yet */
 	struct sockaddr_int *saddr;
 #endif
 
 	*vpp = (struct vnode *)0;
 
 	if (nfs_ispublicfh(fhp)) {
 		if (!pubflag || !nfs_pub.np_valid)
 			return (ESTALE);
 		fhp = &nfs_pub.np_handle;
 	}
 
 	mp = vfs_getvfs(&fhp->fh_fsid);
 	if (!mp)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon);
 	if (error)
 		return (error);
 #ifdef MNT_EXNORESPORT
 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
 		saddr = (struct sockaddr_in *)nam;
 		if (saddr->sin_family == AF_INET &&
 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif
 	/*
 	 * Check/setup credentials.
 	 */
 	if (exflags & MNT_EXKERB) {
 		if (!kerbflag) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	} else if (kerbflag) {
 		vput(*vpp);
 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
 		cred->cr_uid = credanon->cr_uid;
 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
 			cred->cr_groups[i] = credanon->cr_groups[i];
 		cred->cr_ngroups = i;
 	}
 	if (exflags & MNT_EXRDONLY)
 		*rdonlyp = 1;
 	else
 		*rdonlyp = 0;
 
 	nfsrv_object_create(*vpp);
 
 	if (!lockflag)
 		VOP_UNLOCK(*vpp, 0, p);
 	return (0);
 }
 
 
 /*
  * WebNFS: check if a filehandle is a public filehandle. For v3, this
  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
  * transformed this to all zeroes in both cases, so check for it.
  */
 int
 nfs_ispublicfh(fhp)
 	fhandle_t *fhp;
 {
 	char *cp = (char *)fhp;
 	int i;
 
 	for (i = 0; i < NFSX_V3FH; i++)
 		if (*cp++ != 0)
 			return (FALSE);
 	return (TRUE);
 }
   
 #endif /* NFS_NOSERVER */
 /*
  * This function compares two net addresses by family and returns TRUE
  * if they are the same host.
  * If there is any doubt, return FALSE.
  * The AF_INET family is handled as a special case so that address mbufs
  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
  */
 int
 netaddr_match(family, haddr, nam)
 	int family;
 	union nethostaddr *haddr;
 	struct sockaddr *nam;
 {
 	register struct sockaddr_in *inetaddr;
 
 	switch (family) {
 	case AF_INET:
 		inetaddr = (struct sockaddr_in *)nam;
 		if (inetaddr->sin_family == AF_INET &&
 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
 			return (1);
 		break;
 #ifdef ISO
 	case AF_ISO:
 	    {
 		register struct sockaddr_iso *isoaddr1, *isoaddr2;
 
 		isoaddr1 = (struct sockaddr_iso *)nam;
 		isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
 		if (isoaddr1->siso_family == AF_ISO &&
 		    isoaddr1->siso_nlen > 0 &&
 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
 		    SAME_ISOADDR(isoaddr1, isoaddr2))
 			return (1);
 		break;
 	    }
 #endif	/* ISO */
 	default:
 		break;
 	};
 	return (0);
 }
 
 static nfsuint64 nfs_nullcookie = { { 0, 0 } };
 /*
  * This function finds the directory cookie that corresponds to the
  * logical byte offset given.
  */
 nfsuint64 *
 nfs_getcookie(np, off, add)
 	register struct nfsnode *np;
 	off_t off;
 	int add;
 {
 	register struct nfsdmap *dp, *dp2;
 	register int pos;
 
 	pos = (uoff_t)off / NFS_DIRBLKSIZ;
 	if (pos == 0 || off < 0) {
 #ifdef DIAGNOSTIC
 		if (add)
 			panic("nfs getcookie add at <= 0");
 #endif
 		return (&nfs_nullcookie);
 	}
 	pos--;
 	dp = np->n_cookies.lh_first;
 	if (!dp) {
 		if (add) {
 			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp->ndm_eocookie = 0;
 			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
 		} else
 			return ((nfsuint64 *)0);
 	}
 	while (pos >= NFSNUMCOOKIES) {
 		pos -= NFSNUMCOOKIES;
 		if (dp->ndm_list.le_next) {
 			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
 				pos >= dp->ndm_eocookie)
 				return ((nfsuint64 *)0);
 			dp = dp->ndm_list.le_next;
 		} else if (add) {
 			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp2->ndm_eocookie = 0;
 			LIST_INSERT_AFTER(dp, dp2, ndm_list);
 			dp = dp2;
 		} else
 			return ((nfsuint64 *)0);
 	}
 	if (pos >= dp->ndm_eocookie) {
 		if (add)
 			dp->ndm_eocookie = pos + 1;
 		else
 			return ((nfsuint64 *)0);
 	}
 	return (&dp->ndm_cookies[pos]);
 }
 
 /*
  * Invalidate cached directory information, except for the actual directory
  * blocks (which are invalidated separately).
  * Done mainly to avoid the use of stale offset cookies.
  */
 void
 nfs_invaldir(vp)
 	register struct vnode *vp;
 {
 	register struct nfsnode *np = VTONFS(vp);
 
 #ifdef DIAGNOSTIC
 	if (vp->v_type != VDIR)
 		panic("nfs: invaldir not dir");
 #endif
 	np->n_direofoffset = 0;
 	np->n_cookieverf.nfsuquad[0] = 0;
 	np->n_cookieverf.nfsuquad[1] = 0;
 	if (np->n_cookies.lh_first)
 		np->n_cookies.lh_first->ndm_eocookie = 0;
 }
 
 /*
  * The write verifier has changed (probably due to a server reboot), so all
  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
  * flag. Once done the new write verifier can be set for the mount point.
  */
 void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
 	register struct vnode *vp, *nvp;
 	register struct buf *bp, *nbp;
 	int s;
 
 	s = splbio();
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		if (vp->v_mount != mp)	/* Paranoia */
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bp->b_flags &= ~B_NEEDCOMMIT;
 		}
 	}
 	splx(s);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
  */
 int
 nfsrv_errmap(nd, err)
 	struct nfsrv_descript *nd;
 	register int err;
 {
 	register short *defaulterrp, *errp;
 
 	if (nd->nd_flag & ND_NFSV3) {
 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
 		while (*++errp) {
 			if (*errp == err)
 				return (err);
 			else if (*errp > err)
 				break;
 		}
 		return ((int)*defaulterrp);
 	    } else
 		return (err & 0xffff);
 	}
 	if (err <= ELAST)
 		return ((int)nfsrv_v2errmap[err - 1]);
 	return (NFSERR_IO);
 }
 
 int
 nfsrv_object_create(vp)
 	struct vnode *vp;
 {
 
 	if (vp == NULL || vp->v_type != VREG)
 		return (1);
 	return (vfs_object_create(vp, curproc,
 				  curproc ? curproc->p_ucred : NULL));
 }
 
 /*
  * Sort the group list in increasing numerical order.
  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
  *  that used to be here.)
  */
 void
 nfsrvw_sort(list, num)
         register gid_t *list;
         register int num;
 {
 	register int i, j;
 	gid_t v;
 
 	/* Insertion sort. */
 	for (i = 1; i < num; i++) {
 		v = list[i];
 		/* find correct slot for value v, moving others up */
 		for (j = i; --j >= 0 && v < list[j];)
 			list[j + 1] = list[j];
 		list[j + 1] = v;
 	}
 }
 
 /*
  * copy credentials making sure that the result can be compared with bcmp().
  */
 void
 nfsrv_setcred(incred, outcred)
 	register struct ucred *incred, *outcred;
 {
 	register int i;
 
 	bzero((caddr_t)outcred, sizeof (struct ucred));
 	outcred->cr_ref = 1;
 	outcred->cr_uid = incred->cr_uid;
 	outcred->cr_ngroups = incred->cr_ngroups;
 	for (i = 0; i < incred->cr_ngroups; i++)
 		outcred->cr_groups[i] = incred->cr_groups[i];
 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
 }
 #endif /* NFS_NOSERVER */
Index: head/sys/nfs/nfs_subs.c
===================================================================
--- head/sys/nfs/nfs_subs.c	(revision 49534)
+++ head/sys/nfs/nfs_subs.c	(revision 49535)
@@ -1,2281 +1,2280 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.78 1999/06/27 11:44:19 peter Exp $
+ * $Id: nfs_subs.c,v 1.79 1999/07/17 18:43:47 phk Exp $
  */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
  * the nfs op functions. They do things like create the rpc header and
  * copy data between mbuf chains and uio lists.
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsrtt.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <netinet/in.h>
 #ifdef ISO
 #include <netiso/iso.h>
 #endif
 
 /*
  * Data items converted to xdr at startup, since they are constant
  * This is kinda hokey, but may save a little time doing byte swaps
  */
 u_int32_t nfs_xdrneg1;
 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
 	rpc_auth_kerb;
 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
 
 /* And other global data */
 static u_int32_t nfs_xid = 0;
 static enum vtype nv2tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON 
 };
 enum vtype nv3tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
 };
 
 int nfs_ticks;
 int nfs_pbuf_freecnt = -1;	/* start out unlimited */
 
 struct nfs_reqq nfs_reqq;
 struct nfssvc_sockhead nfssvc_sockhead;
 int nfssvc_sockhead_flag;
 struct nfsd_head nfsd_head;
 int nfsd_head_flag;
 struct nfs_bufq nfs_bufq;
 struct nqtimerhead nqtimerhead;
 struct nqfhhashhead *nqfhhashtbl;
 u_long nqfhhash;
 
 static void (*nfs_prev_lease_updatetime) __P((int));
 static int nfs_prev_nfssvc_sy_narg;
 static sy_call_t *nfs_prev_nfssvc_sy_call;
 
 #ifndef NFS_NOSERVER
 
 static vop_t *nfs_prev_vop_lease_check;
 static int nfs_prev_getfh_sy_narg;
 static sy_call_t *nfs_prev_getfh_sy_call;
 
 /*
  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
  */
 int nfsv3_procid[NFS_NPROCS] = {
 	NFSPROC_NULL,
 	NFSPROC_GETATTR,
 	NFSPROC_SETATTR,
 	NFSPROC_NOOP,
 	NFSPROC_LOOKUP,
 	NFSPROC_READLINK,
 	NFSPROC_READ,
 	NFSPROC_NOOP,
 	NFSPROC_WRITE,
 	NFSPROC_CREATE,
 	NFSPROC_REMOVE,
 	NFSPROC_RENAME,
 	NFSPROC_LINK,
 	NFSPROC_SYMLINK,
 	NFSPROC_MKDIR,
 	NFSPROC_RMDIR,
 	NFSPROC_READDIR,
 	NFSPROC_FSSTAT,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP
 };
 
 #endif /* NFS_NOSERVER */
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
 int nfsv2_procid[NFS_NPROCS] = {
 	NFSV2PROC_NULL,
 	NFSV2PROC_GETATTR,
 	NFSV2PROC_SETATTR,
 	NFSV2PROC_LOOKUP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_READLINK,
 	NFSV2PROC_READ,
 	NFSV2PROC_WRITE,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_MKDIR,
 	NFSV2PROC_SYMLINK,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_REMOVE,
 	NFSV2PROC_RMDIR,
 	NFSV2PROC_RENAME,
 	NFSV2PROC_LINK,
 	NFSV2PROC_READDIR,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_STATFS,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 };
 
 #ifndef NFS_NOSERVER
 /*
  * Maps errno values to nfs error numbers.
  * Use NFSERR_IO as the catch all for ones not specifically defined in
  * RFC 1094.
  */
 static u_char nfsrv_v2errmap[ELAST] = {
   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO /* << Last is 86 */
 };
 
 /*
  * Maps errno values to nfs error numbers.
  * Although it is not obvious whether or not NFS clients really care if
  * a returned error value is in the specified list for the procedure, the
  * safest thing to do is filter them appropriately. For Version 2, the
  * X/Open XNFS document is the only specification that defines error values
  * for each RPC (The RFC simply lists all possible error values for all RPCs),
  * so I have decided to not do this for Version 2.
  * The first entry is the default error return and the rest are the valid
  * errors for that RPC in increasing numeric order.
  */
 static short nfsv3err_null[] = {
 	0,
 	0,
 };
 
 static short nfsv3err_getattr[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_setattr[] = {
 	NFSERR_IO,
 	NFSERR_PERM,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOT_SYNC,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_lookup[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_access[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_read[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_NXIO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_write[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_FBIG,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_create[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mkdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_symlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mknod[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	NFSERR_BADTYPE,
 	0,
 };
 
 static short nfsv3err_remove[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rmdir[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rename[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_ISDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_link[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdirplus[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_NOTSUPP,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsstat[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsinfo[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_pathconf[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_commit[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short *nfsrv_v3errmap[] = {
 	nfsv3err_null,
 	nfsv3err_getattr,
 	nfsv3err_setattr,
 	nfsv3err_lookup,
 	nfsv3err_access,
 	nfsv3err_readlink,
 	nfsv3err_read,
 	nfsv3err_write,
 	nfsv3err_create,
 	nfsv3err_mkdir,
 	nfsv3err_symlink,
 	nfsv3err_mknod,
 	nfsv3err_remove,
 	nfsv3err_rmdir,
 	nfsv3err_rename,
 	nfsv3err_link,
 	nfsv3err_readdir,
 	nfsv3err_readdirplus,
 	nfsv3err_fsstat,
 	nfsv3err_fsinfo,
 	nfsv3err_pathconf,
 	nfsv3err_commit,
 };
 
 #endif /* NFS_NOSERVER */
 
 extern struct nfsrtt nfsrtt;
 extern time_t nqnfsstarttime;
 extern int nqsrv_clockskew;
 extern int nqsrv_writeslack;
 extern int nqsrv_maxlease;
 extern struct nfsstats nfsstats;
 extern int nqnfs_piggy[NFS_NPROCS];
 extern nfstype nfsv2_type[9];
 extern nfstype nfsv3_type[9];
 extern struct nfsnodehashhead *nfsnodehashtbl;
 extern u_long nfsnodehash;
 
 struct getfh_args;
 extern int getfh(struct proc *, struct getfh_args *, int *);
 struct nfssvc_args;
 extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
 
 LIST_HEAD(nfsnodehashhead, nfsnode);
 
 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
 
 u_quad_t
 nfs_curusec() 
 {
 	struct timeval tv;
 	
 	getmicrotime(&tv);
 	return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
 }
 
 /*
  * Create the header for an rpc request packet
  * The hsiz is the size of the rest of the nfs request header.
  * (just used to decide if a cluster is a good idea)
  */
 struct mbuf *
 nfsm_reqh(vp, procid, hsiz, bposp)
 	struct vnode *vp;
 	u_long procid;
 	int hsiz;
 	caddr_t *bposp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	struct mbuf *mb2;
 	struct nfsmount *nmp;
 	int nqflag;
 
 	MGET(mb, M_WAIT, MT_DATA);
 	if (hsiz >= MINCLSIZE)
 		MCLGET(mb, M_WAIT);
 	mb->m_len = 0;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * For NQNFS, add lease request.
 	 */
 	if (vp) {
 		nmp = VFSTONFS(vp->v_mount);
 		if (nmp->nm_flag & NFSMNT_NQNFS) {
 			nqflag = NQNFS_NEEDLEASE(vp, procid);
 			if (nqflag) {
 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(nqflag);
 				*tl = txdr_unsigned(nmp->nm_leaseterm);
 			} else {
 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = 0;
 			}
 		}
 	}
 	/* Finally, return values */
 	*bposp = bpos;
 	return (mb);
 }
 
 /*
  * Build the RPC header and fill in the authorization info.
  * The authorization string argument is only used when the credentials
  * come from outside of the kernel.
  * Returns the head of the mbuf list.
  */
 struct mbuf *
 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
 	verf_str, mrest, mrest_len, mbp, xidp)
 	register struct ucred *cr;
 	int nmflag;
 	int procid;
 	int auth_type;
 	int auth_len;
 	char *auth_str;
 	int verf_len;
 	char *verf_str;
 	struct mbuf *mrest;
 	int mrest_len;
 	struct mbuf **mbp;
 	u_int32_t *xidp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	register int i;
 	struct mbuf *mreq, *mb2;
 	int siz, grpsiz, authsiz;
 
 	authsiz = nfsm_rndup(auth_len);
 	MGETHDR(mb, M_WAIT, MT_DATA);
 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
 		MCLGET(mb, M_WAIT);
 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
 	} else {
 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
 	}
 	mb->m_len = 0;
 	mreq = mb;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * First the RPC header.
 	 */
 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 
 	/* Get a pretty random xid to start with */
 	if (!nfs_xid) 
 		nfs_xid = random();
 	/*
 	 * Skip zero xid if it should ever happen.
 	 */
 	if (++nfs_xid == 0)
 		nfs_xid++;
 
 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
 	*tl++ = rpc_call;
 	*tl++ = rpc_vers;
 	if (nmflag & NFSMNT_NQNFS) {
 		*tl++ = txdr_unsigned(NQNFS_PROG);
 		*tl++ = txdr_unsigned(NQNFS_VER3);
 	} else {
 		*tl++ = txdr_unsigned(NFS_PROG);
 		if (nmflag & NFSMNT_NFSV3)
 			*tl++ = txdr_unsigned(NFS_VER3);
 		else
 			*tl++ = txdr_unsigned(NFS_VER2);
 	}
 	if (nmflag & NFSMNT_NFSV3)
 		*tl++ = txdr_unsigned(procid);
 	else
 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
 
 	/*
 	 * And then the authorization cred.
 	 */
 	*tl++ = txdr_unsigned(auth_type);
 	*tl = txdr_unsigned(authsiz);
 	switch (auth_type) {
 	case RPCAUTH_UNIX:
 		nfsm_build(tl, u_int32_t *, auth_len);
 		*tl++ = 0;		/* stamp ?? */
 		*tl++ = 0;		/* NULL hostname */
 		*tl++ = txdr_unsigned(cr->cr_uid);
 		*tl++ = txdr_unsigned(cr->cr_groups[0]);
 		grpsiz = (auth_len >> 2) - 5;
 		*tl++ = txdr_unsigned(grpsiz);
 		for (i = 1; i <= grpsiz; i++)
 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
 		break;
 	case RPCAUTH_KERB4:
 		siz = auth_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(auth_str, bpos, i);
 			mb->m_len += i;
 			auth_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 		break;
 	};
 
 	/*
 	 * And the verifier...
 	 */
 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	if (verf_str) {
 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
 		*tl = txdr_unsigned(verf_len);
 		siz = verf_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(verf_str, bpos, i);
 			mb->m_len += i;
 			verf_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 	} else {
 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
 		*tl = 0;
 	}
 	mb->m_next = mrest;
 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
 	*mbp = mb;
 	return (mreq);
 }
 
 /*
  * copies mbuf chain to the uio scatter/gather list
  */
 int
 nfsm_mbuftouio(mrep, uiop, siz, dpos)
 	struct mbuf **mrep;
 	register struct uio *uiop;
 	int siz;
 	caddr_t *dpos;
 {
 	register char *mbufcp, *uiocp;
 	register int xfer, left, len;
 	register struct mbuf *mp;
 	long uiosiz, rem;
 	int error = 0;
 
 	mp = *mrep;
 	mbufcp = *dpos;
 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
 	rem = nfsm_rndup(siz)-siz;
 	while (siz > 0) {
 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
 			return (EFBIG);
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			while (len == 0) {
 				mp = mp->m_next;
 				if (mp == NULL)
 					return (EBADRPC);
 				mbufcp = mtod(mp, caddr_t);
 				len = mp->m_len;
 			}
 			xfer = (left > len) ? len : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(mbufcp, uiocp, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(mbufcp, uiocp, xfer);
 			else
 				copyout(mbufcp, uiocp, xfer);
 			left -= xfer;
 			len -= xfer;
 			mbufcp += xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		if (uiop->uio_iov->iov_len <= siz) {
 			uiop->uio_iovcnt--;
 			uiop->uio_iov++;
 		} else {
 			uiop->uio_iov->iov_base += uiosiz;
 			uiop->uio_iov->iov_len -= uiosiz;
 		}
 		siz -= uiosiz;
 	}
 	*dpos = mbufcp;
 	*mrep = mp;
 	if (rem > 0) {
 		if (len < rem)
 			error = nfs_adv(mrep, dpos, rem, len);
 		else
 			*dpos += rem;
 	}
 	return (error);
 }
 
 /*
  * copies a uio scatter/gather list to an mbuf chain.
  * NOTE: can ony handle iovcnt == 1
  */
 int
 nfsm_uiotombuf(uiop, mq, siz, bpos)
 	register struct uio *uiop;
 	struct mbuf **mq;
 	int siz;
 	caddr_t *bpos;
 {
 	register char *uiocp;
 	register struct mbuf *mp, *mp2;
 	register int xfer, left, mlen;
 	int uiosiz, clflg, rem;
 	char *cp;
 
 #ifdef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfsm_uiotombuf: iovcnt != 1");
 #endif
 
 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
 		clflg = 1;
 	else
 		clflg = 0;
 	rem = nfsm_rndup(siz)-siz;
 	mp = mp2 = *mq;
 	while (siz > 0) {
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			mlen = M_TRAILINGSPACE(mp);
 			if (mlen == 0) {
 				MGET(mp, M_WAIT, MT_DATA);
 				if (clflg)
 					MCLGET(mp, M_WAIT);
 				mp->m_len = 0;
 				mp2->m_next = mp;
 				mp2 = mp;
 				mlen = M_TRAILINGSPACE(mp);
 			}
 			xfer = (left > mlen) ? mlen : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			mp->m_len += xfer;
 			left -= xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		uiop->uio_iov->iov_base += uiosiz;
 		uiop->uio_iov->iov_len -= uiosiz;
 		siz -= uiosiz;
 	}
 	if (rem > 0) {
 		if (rem > M_TRAILINGSPACE(mp)) {
 			MGET(mp, M_WAIT, MT_DATA);
 			mp->m_len = 0;
 			mp2->m_next = mp;
 		}
 		cp = mtod(mp, caddr_t)+mp->m_len;
 		for (left = 0; left < rem; left++)
 			*cp++ = '\0';
 		mp->m_len += rem;
 		*bpos = cp;
 	} else
 		*bpos = mtod(mp, caddr_t)+mp->m_len;
 	*mq = mp;
 	return (0);
 }
 
 /*
  * Help break down an mbuf chain by setting the first siz bytes contiguous
  * pointed to by returned val.
  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
  * cases. (The macros use the vars. dpos and dpos2)
  */
 int
 nfsm_disct(mdp, dposp, siz, left, cp2)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int siz;
 	int left;
 	caddr_t *cp2;
 {
 	register struct mbuf *mp, *mp2;
 	register int siz2, xfer;
 	register caddr_t p;
 
 	mp = *mdp;
 	while (left == 0) {
 		*mdp = mp = mp->m_next;
 		if (mp == NULL)
 			return (EBADRPC);
 		left = mp->m_len;
 		*dposp = mtod(mp, caddr_t);
 	}
 	if (left >= siz) {
 		*cp2 = *dposp;
 		*dposp += siz;
 	} else if (mp->m_next == NULL) {
 		return (EBADRPC);
 	} else if (siz > MHLEN) {
 		panic("nfs S too big");
 	} else {
 		MGET(mp2, M_WAIT, MT_DATA);
 		mp2->m_next = mp->m_next;
 		mp->m_next = mp2;
 		mp->m_len -= left;
 		mp = mp2;
 		*cp2 = p = mtod(mp, caddr_t);
 		bcopy(*dposp, p, left);		/* Copy what was left */
 		siz2 = siz-left;
 		p += left;
 		mp2 = mp->m_next;
 		/* Loop around copying up the siz2 bytes */
 		while (siz2 > 0) {
 			if (mp2 == NULL)
 				return (EBADRPC);
 			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
 			if (xfer > 0) {
 				bcopy(mtod(mp2, caddr_t), p, xfer);
 				NFSMADV(mp2, xfer);
 				mp2->m_len -= xfer;
 				p += xfer;
 				siz2 -= xfer;
 			}
 			if (siz2 > 0)
 				mp2 = mp2->m_next;
 		}
 		mp->m_len = siz;
 		*mdp = mp2;
 		*dposp = mtod(mp2, caddr_t);
 	}
 	return (0);
 }
 
 /*
  * Advance the position in the mbuf chain.
  */
 int
 nfs_adv(mdp, dposp, offs, left)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int offs;
 	int left;
 {
 	register struct mbuf *m;
 	register int s;
 
 	m = *mdp;
 	s = left;
 	while (s < offs) {
 		offs -= s;
 		m = m->m_next;
 		if (m == NULL)
 			return (EBADRPC);
 		s = m->m_len;
 	}
 	*mdp = m;
 	*dposp = mtod(m, caddr_t)+offs;
 	return (0);
 }
 
 /*
  * Copy a string into mbufs for the hard cases...
  */
 int
 nfsm_strtmbuf(mb, bpos, cp, siz)
 	struct mbuf **mb;
 	char **bpos;
 	const char *cp;
 	long siz;
 {
 	register struct mbuf *m1 = NULL, *m2;
 	long left, xfer, len, tlen;
 	u_int32_t *tl;
 	int putsize;
 
 	putsize = 1;
 	m2 = *mb;
 	left = M_TRAILINGSPACE(m2);
 	if (left > 0) {
 		tl = ((u_int32_t *)(*bpos));
 		*tl++ = txdr_unsigned(siz);
 		putsize = 0;
 		left -= NFSX_UNSIGNED;
 		m2->m_len += NFSX_UNSIGNED;
 		if (left > 0) {
 			bcopy(cp, (caddr_t) tl, left);
 			siz -= left;
 			cp += left;
 			m2->m_len += left;
 			left = 0;
 		}
 	}
 	/* Loop around adding mbufs */
 	while (siz > 0) {
 		MGET(m1, M_WAIT, MT_DATA);
 		if (siz > MLEN)
 			MCLGET(m1, M_WAIT);
 		m1->m_len = NFSMSIZ(m1);
 		m2->m_next = m1;
 		m2 = m1;
 		tl = mtod(m1, u_int32_t *);
 		tlen = 0;
 		if (putsize) {
 			*tl++ = txdr_unsigned(siz);
 			m1->m_len -= NFSX_UNSIGNED;
 			tlen = NFSX_UNSIGNED;
 			putsize = 0;
 		}
 		if (siz < m1->m_len) {
 			len = nfsm_rndup(siz);
 			xfer = siz;
 			if (xfer < len)
 				*(tl+(xfer>>2)) = 0;
 		} else {
 			xfer = len = m1->m_len;
 		}
 		bcopy(cp, (caddr_t) tl, xfer);
 		m1->m_len = len+tlen;
 		siz -= xfer;
 		cp += xfer;
 	}
 	*mb = m1;
 	*bpos = mtod(m1, caddr_t)+m1->m_len;
 	return (0);
 }
 
 /*
  * Called once to initialize data structures...
  */
 int
 nfs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 	register int i;
 
 	nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
 
 	/*
 	 * Check to see if major data structures haven't bloated.
 	 */
 	if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
 		printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
 		printf("Try reducing NFS_UIDHASHSIZ\n");
 	}
 	if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
 		printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
 		printf("Try unionizing the nu_nickname and nu_flag fields\n");
 	}
 	nfs_mount_type = vfsp->vfc_typenum;
 	nfsrtt.pos = 0;
 	rpc_vers = txdr_unsigned(RPC_VER2);
 	rpc_call = txdr_unsigned(RPC_CALL);
 	rpc_reply = txdr_unsigned(RPC_REPLY);
 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
 	nfs_prog = txdr_unsigned(NFS_PROG);
 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
 	nfs_true = txdr_unsigned(TRUE);
 	nfs_false = txdr_unsigned(FALSE);
 	nfs_xdrneg1 = txdr_unsigned(-1);
 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
 	if (nfs_ticks < 1)
 		nfs_ticks = 1;
 	/* Ensure async daemons disabled */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
 		nfs_iodwant[i] = (struct proc *)0;
 		nfs_iodmount[i] = (struct nfsmount *)0;
 	}
 	nfs_nhinit();			/* Init the nfsnode table */
 #ifndef NFS_NOSERVER
 	nfsrv_init(0);			/* Init server data structures */
 	nfsrv_initcache();		/* Init the server request cache */
 #endif
 
 	/*
 	 * Initialize the nqnfs server stuff.
 	 */
 	if (nqnfsstarttime == 0) {
 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
 			+ nqsrv_clockskew + nqsrv_writeslack;
 		NQLOADNOVRAM(nqnfsstarttime);
 		CIRCLEQ_INIT(&nqtimerhead);
 		nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
 	}
 
 	/*
 	 * Initialize reply list and start timer
 	 */
 	TAILQ_INIT(&nfs_reqq);
 
 	nfs_timer(0);
 
 	/*
 	 * Set up lease_check and lease_updatetime so that other parts
 	 * of the system can call us, if we are loadable.
 	 */
 #ifndef NFS_NOSERVER
 	nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
 	default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
 #endif
 	nfs_prev_lease_updatetime = lease_updatetime;
 	lease_updatetime = nfs_lease_updatetime;
 	nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
 	sysent[SYS_nfssvc].sy_narg = 2;
 	nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
 	sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
 #ifndef NFS_NOSERVER
 	nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
 	sysent[SYS_getfh].sy_narg = 2;
 	nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
 	sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
 #endif
 
 	nfs_pbuf_freecnt = nswbuf / 2 + 1;
 
 	return (0);
 }
 
 int
 nfs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
 	nfs_mount_type = -1;
 #ifndef NFS_NOSERVER
 	default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
 #endif
 	lease_updatetime = nfs_prev_lease_updatetime;
 	sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
 	sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
 #ifndef NFS_NOSERVER
 	sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
 	sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
 #endif
 	return (0);
 }
 
 /*
  * Attribute cache routines.
  * nfs_loadattrcache() - loads or updates the cache contents from attributes
  *	that are on the mbuf list
  * nfs_getattrcache() - returns valid attributes if found in cache, returns
  *	error otherwise
  */
 
 /*
  * Load the attribute cache (that lives in the nfsnode entry) with
  * the values on the mbuf list and
  * Iff vap not NULL
  *    copy the attributes to *vaper
  */
 int
 nfs_loadattrcache(vpp, mdp, dposp, vaper)
 	struct vnode **vpp;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vattr *vaper;
 {
 	register struct vnode *vp = *vpp;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 	register struct nfsnode *np;
 	register int32_t t1;
 	caddr_t cp2;
 	int error = 0, rdev;
 	struct mbuf *md;
 	enum vtype vtyp;
 	u_short vmode;
 	struct timespec mtime;
 	struct vnode *nvp;
 	int v3 = NFS_ISV3(vp);
 
 	md = *mdp;
 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
 	if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
 		return (error);
 	fp = (struct nfs_fattr *)cp2;
 	if (v3) {
 		vtyp = nfsv3tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
 			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
 	} else {
 		vtyp = nfsv2tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		/*
 		 * XXX
 		 *
 		 * The duplicate information returned in fa_type and fa_mode
 		 * is an ambiguity in the NFS version 2 protocol.
 		 *
 		 * VREG should be taken literally as a regular file.  If a
 		 * server intents to return some type information differently
 		 * in the upper bits of the mode field (e.g. for sockets, or
 		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
 		 * leave the examination of the mode bits even in the VREG
 		 * case to avoid breakage for bogus servers, but we make sure
 		 * that there are actually type bits set in the upper part of
 		 * fa_mode (and failing that, trust the va_type field).
 		 *
 		 * NFSv3 cleared the issue, and requires fa_mode to not
 		 * contain any type information (while also introduing sockets
 		 * and FIFOs for fa_type).
 		 */
 		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
 			vtyp = IFTOVT(vmode);
 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
 
 		/*
 		 * Really ugly NFSv2 kludge.
 		 */
 		if (vtyp == VCHR && rdev == 0xffffffff)
 			vtyp = VFIFO;
 	}
 
 	/*
 	 * If v_type == VNON it is a new node, so fill in the v_type,
 	 * n_mtime fields. Check to see if it represents a special
 	 * device, and if so, check for a possible alias. Once the
 	 * correct vnode has been obtained, fill in the rest of the
 	 * information.
 	 */
 	np = VTONFS(vp);
 	if (vp->v_type != vtyp) {
 		vp->v_type = vtyp;
 		if (vp->v_type == VFIFO) {
 			vp->v_op = fifo_nfsv2nodeop_p;
 		}
 		if (vp->v_type == VCHR || vp->v_type == VBLK) {
 			vp->v_op = spec_nfsv2nodeop_p;
 			nvp = checkalias(vp, rdev, vp->v_mount);
 			if (nvp) {
 				/*
 				 * Discard unneeded vnode, but save its nfsnode.
 				 * Since the nfsnode does not have a lock, its
 				 * vnode lock has to be carried over.
 				 */
 				nvp->v_vnlock = vp->v_vnlock;
 				vp->v_vnlock = NULL;
 				nvp->v_data = vp->v_data;
 				vp->v_data = NULL;
 				vp->v_op = spec_vnodeop_p;
 				vrele(vp);
 				vgone(vp);
 				/*
 				 * Reinitialize aliased node.
 				 */
 				np->n_vnode = nvp;
 				*vpp = vp = nvp;
 			}
 		}
 		np->n_mtime = mtime.tv_sec;
 	}
 	vap = &np->n_vattr;
 	vap->va_type = vtyp;
 	vap->va_mode = (vmode & 07777);
 	vap->va_rdev = rdev;
 	vap->va_mtime = mtime;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	if (v3) {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_hyper(&fp->fa3_size);
 		vap->va_blocksize = NFS_FABLKSIZE;
 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
 		vap->va_fileid = fxdr_unsigned(int32_t,
 		    fp->fa3_fileid.nfsuquad[1]);
 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
 		vap->va_flags = 0;
 		vap->va_filerev = 0;
 	} else {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
 		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
 		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
 		    * NFS_FABLKSIZE;
 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
 		vap->va_flags = 0;
 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
 		    fp->fa2_ctime.nfsv2_sec);
 		vap->va_ctime.tv_nsec = 0;
 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	np->n_attrstamp = time_second;
 	if (vaper != NULL) {
 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
 		if (np->n_flag & NCHG) {
 			if (np->n_flag & NACC)
 				vaper->va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vaper->va_mtime = np->n_mtim;
 		}
 	}
 	return (0);
 }
 
 #ifdef NFS_ACDEBUG
 #include <sys/sysctl.h>
 SYSCTL_DECL(_vfs_nfs);
 static int nfs_acdebug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
 #endif
 
 /*
  * Check the time stamp
  * If the cache is valid, copy contents to *vap and return 0
  * otherwise return an error
  */
 int
 nfs_getattrcache(vp, vaper)
 	register struct vnode *vp;
 	struct vattr *vaper;
 {
 	register struct nfsnode *np;
 	register struct vattr *vap;
 	struct nfsmount *nmp;
 	int timeo;
 
 	np = VTONFS(vp);
 	vap = &np->n_vattr;
 	nmp = VFSTONFS(vp->v_mount);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime) / 10;
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug>1)
 		printf("nfs_getattrcache: initial timeo = %d\n", timeo);
 #endif
 
 	if (vap->va_type == VDIR) {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
 			timeo = nmp->nm_acdirmin;
 		else if (timeo > nmp->nm_acdirmax)
 			timeo = nmp->nm_acdirmax;
 	} else {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
 			timeo = nmp->nm_acregmin;
 		else if (timeo > nmp->nm_acregmax)
 			timeo = nmp->nm_acregmax;
 	}
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug > 2)
 		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
 			nmp->nm_acregmin, nmp->nm_acregmax,
 			nmp->nm_acdirmin, nmp->nm_acdirmax);
 
 	if (nfs_acdebug)
 		printf("nfs_getattrcache: age = %d; final timeo = %d\n",
 			(time_second - np->n_attrstamp), timeo);
 #endif
 
 	if ((time_second - np->n_attrstamp) >= timeo) {
 		nfsstats.attrcache_misses++;
 		return (ENOENT);
 	}
 	nfsstats.attrcache_hits++;
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
 	if (np->n_flag & NCHG) {
 		if (np->n_flag & NACC)
 			vaper->va_atime = np->n_atim;
 		if (np->n_flag & NUPD)
 			vaper->va_mtime = np->n_mtim;
 	}
 	return (0);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Set up nameidata for a lookup() call and do it.
  *
  * If pubflag is set, this call is done for a lookup operation on the
  * public filehandle. In that case we allow crossing mountpoints and
  * absolute pathnames. However, the caller is expected to check that
  * the lookup result is within the public fs, and deny access if
  * it is not.
  *
  * nfs_namei() clears out garbage fields that namei() might leave garbage.
  * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
  * error occurs but the parent was not requested.
  *
  * dirp may be set whether an error is returned or not, and must be 
  * released by the caller.
  */
 int
 nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
 	register struct nameidata *ndp;
 	fhandle_t *fhp;
 	int len;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vnode **retdirp;
 	struct proc *p;
 	int kerbflag, pubflag;
 {
 	register int i, rem;
 	register struct mbuf *md;
 	register char *fromcp, *tocp, *cp;
 	struct iovec aiov;
 	struct uio auio;
 	struct vnode *dp;
 	int error, rdonly, linklen;
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	*retdirp = (struct vnode *)0;
 	cnp->cn_pnbuf = zalloc(namei_zone);
 
 	/*
 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
 	 * and set the various ndp fields appropriately.
 	 */
 	fromcp = *dposp;
 	tocp = cnp->cn_pnbuf;
 	md = *mdp;
 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
 	cnp->cn_hash = 0;
 	for (i = 0; i < len; i++) {
 		while (rem == 0) {
 			md = md->m_next;
 			if (md == NULL) {
 				error = EBADRPC;
 				goto out;
 			}
 			fromcp = mtod(md, caddr_t);
 			rem = md->m_len;
 		}
 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
 			error = EACCES;
 			goto out;
 		}
 		cnp->cn_hash += (unsigned char)*fromcp;
 		*tocp++ = *fromcp++;
 		rem--;
 	}
 	*tocp = '\0';
 	*mdp = md;
 	*dposp = fromcp;
 	len = nfsm_rndup(len)-len;
 	if (len > 0) {
 		if (rem >= len)
 			*dposp += len;
 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
 			goto out;
 	}
 
 	/*
 	 * Extract and set starting directory.
 	 */
 	error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
 	    nam, &rdonly, kerbflag, pubflag);
 	if (error)
 		goto out;
 	if (dp->v_type != VDIR) {
 		vrele(dp);
 		error = ENOTDIR;
 		goto out;
 	}
 
 	if (rdonly)
 		cnp->cn_flags |= RDONLY;
 
 	/*
 	 * Set return directory.  Reference to dp is implicitly transfered 
 	 * to the returned pointer
 	 */
 	*retdirp = dp;
 
 	if (pubflag) {
 		/*
 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
 		 * and the 'native path' indicator.
 		 */
 		cp = zalloc(namei_zone);
 		fromcp = cnp->cn_pnbuf;
 		tocp = cp;
 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
 			switch ((unsigned char)*fromcp) {
 			case WEBNFS_NATIVE_CHAR:
 				/*
 				 * 'Native' path for us is the same
 				 * as a path according to the NFS spec,
 				 * just skip the escape char.
 				 */
 				fromcp++;
 				break;
 			/*
 			 * More may be added in the future, range 0x80-0xff
 			 */
 			default:
 				error = EIO;
 				zfree(namei_zone, cp);
 				goto out;
 			}
 		}
 		/*
 		 * Translate the '%' escapes, URL-style.
 		 */
 		while (*fromcp != '\0') {
 			if (*fromcp == WEBNFS_ESC_CHAR) {
 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
 					fromcp++;
 					*tocp++ = HEXSTRTOI(fromcp);
 					fromcp += 2;
 					continue;
 				} else {
 					error = ENOENT;
 					zfree(namei_zone, cp);
 					goto out;
 				}
 			} else
 				*tocp++ = *fromcp++;
 		}
 		*tocp = '\0';
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_pnbuf = cp;
 	}
 
 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (pubflag) {
 		ndp->ni_rootdir = rootvnode;
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/')
 			dp = rootvnode;
 	} else {
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * becuase lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_proc = p;
 	VREF(dp);
 	ndp->ni_startdir = dp;
 
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial 
 		 * termination occurs if no symlink encountered.
 		 * Note: zfree is safe because error is 0, so we will
 		 * not zfree it again when we break.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			nfsrv_object_create(ndp->ni_vp);
 			if (cnp->cn_flags & (SAVENAME | SAVESTART))
 				cnp->cn_flags |= HASBUF;
 			else
 				zfree(namei_zone, cnp->cn_pnbuf);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			VOP_UNLOCK(ndp->ni_dvp, 0, p);
 		if (!pubflag) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = zalloc(namei_zone);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = (struct proc *)0;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory 
 		 * should replace current directory.  Normally ni_dvp 
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 
 	/*
 	 * nfs_namei() guarentees that fields will not contain garbage
 	 * whether an error occurs or not.  This allows the caller to track
 	 * cleanup state trivially.
 	 */
 out:
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 		cnp->cn_flags &= ~HASBUF;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 	return (error);
 }
 
 /*
  * A fiddled version of m_adj() that ensures null fill to a long
  * boundary and only trims off the back end
  */
 void
 nfsm_adj(mp, len, nul)
 	struct mbuf *mp;
 	register int len;
 	int nul;
 {
 	register struct mbuf *m;
 	register int count, i;
 	register char *cp;
 
 	/*
 	 * Trim from tail.  Scan the mbuf chain,
 	 * calculating its length and finding the last mbuf.
 	 * If the adjustment only affects this mbuf, then just
 	 * adjust and return.  Otherwise, rescan and truncate
 	 * after the remaining size.
 	 */
 	count = 0;
 	m = mp;
 	for (;;) {
 		count += m->m_len;
 		if (m->m_next == (struct mbuf *)0)
 			break;
 		m = m->m_next;
 	}
 	if (m->m_len > len) {
 		m->m_len -= len;
 		if (nul > 0) {
 			cp = mtod(m, caddr_t)+m->m_len-nul;
 			for (i = 0; i < nul; i++)
 				*cp++ = '\0';
 		}
 		return;
 	}
 	count -= len;
 	if (count < 0)
 		count = 0;
 	/*
 	 * Correct length for chain is "count".
 	 * Find the mbuf with last data, adjust its length,
 	 * and toss data from remaining mbufs on chain.
 	 */
 	for (m = mp; m; m = m->m_next) {
 		if (m->m_len >= count) {
 			m->m_len = count;
 			if (nul > 0) {
 				cp = mtod(m, caddr_t)+m->m_len-nul;
 				for (i = 0; i < nul; i++)
 					*cp++ = '\0';
 			}
 			break;
 		}
 		count -= m->m_len;
 	}
 	for (m = m->m_next;m;m = m->m_next)
 		m->m_len = 0;
 }
 
 /*
  * Make these functions instead of macros, so that the kernel text size
  * doesn't get too big...
  */
 void
 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int before_ret;
 	register struct vattr *before_vap;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 
 	if (before_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		*tl++ = nfs_true;
 		txdr_hyper(before_vap->va_size, tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
 	}
 	*bposp = bpos;
 	*mbp = mb;
 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
 }
 
 void
 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 	register struct nfs_fattr *fp;
 
 	if (after_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
 		*tl++ = nfs_true;
 		fp = (struct nfs_fattr *)tl;
 		nfsm_srvfattr(nfsd, after_vap, fp);
 	}
 	*mbp = mb;
 	*bposp = bpos;
 }
 
 void
 nfsm_srvfattr(nfsd, vap, fp)
 	register struct nfsrv_descript *nfsd;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 {
 
 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
 	fp->fa_uid = txdr_unsigned(vap->va_uid);
 	fp->fa_gid = txdr_unsigned(vap->va_gid);
 	if (nfsd->nd_flag & ND_NFSV3) {
 		fp->fa_type = vtonfsv3_type(vap->va_type);
 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
 		txdr_hyper(vap->va_size, &fp->fa3_size);
 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
 		fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
 		fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
 		fp->fa3_fsid.nfsuquad[0] = 0;
 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
 		fp->fa3_fileid.nfsuquad[0] = 0;
 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
 	} else {
 		fp->fa_type = vtonfsv2_type(vap->va_type);
 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		fp->fa2_size = txdr_unsigned(vap->va_size);
 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
 		if (vap->va_type == VFIFO)
 			fp->fa2_rdev = 0xffffffff;
 		else
 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
 	}
 }
 
 /*
  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling VFS_FHTOVP()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	- if not lockflag unlock it with VOP_UNLOCK()
  */
 int
 nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
 	fhandle_t *fhp;
 	int lockflag;
 	struct vnode **vpp;
 	struct ucred *cred;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	int *rdonlyp;
 	int kerbflag;
 	int pubflag;
 {
 	struct proc *p = curproc; /* XXX */
 	register struct mount *mp;
 	register int i;
 	struct ucred *credanon;
 	int error, exflags;
 #ifdef MNT_EXNORESPORT		/* XXX needs mountd and /etc/exports help yet */
 	struct sockaddr_int *saddr;
 #endif
 
 	*vpp = (struct vnode *)0;
 
 	if (nfs_ispublicfh(fhp)) {
 		if (!pubflag || !nfs_pub.np_valid)
 			return (ESTALE);
 		fhp = &nfs_pub.np_handle;
 	}
 
 	mp = vfs_getvfs(&fhp->fh_fsid);
 	if (!mp)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon);
 	if (error)
 		return (error);
 #ifdef MNT_EXNORESPORT
 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
 		saddr = (struct sockaddr_in *)nam;
 		if (saddr->sin_family == AF_INET &&
 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif
 	/*
 	 * Check/setup credentials.
 	 */
 	if (exflags & MNT_EXKERB) {
 		if (!kerbflag) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	} else if (kerbflag) {
 		vput(*vpp);
 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
 		cred->cr_uid = credanon->cr_uid;
 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
 			cred->cr_groups[i] = credanon->cr_groups[i];
 		cred->cr_ngroups = i;
 	}
 	if (exflags & MNT_EXRDONLY)
 		*rdonlyp = 1;
 	else
 		*rdonlyp = 0;
 
 	nfsrv_object_create(*vpp);
 
 	if (!lockflag)
 		VOP_UNLOCK(*vpp, 0, p);
 	return (0);
 }
 
 
 /*
  * WebNFS: check if a filehandle is a public filehandle. For v3, this
  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
  * transformed this to all zeroes in both cases, so check for it.
  */
 int
 nfs_ispublicfh(fhp)
 	fhandle_t *fhp;
 {
 	char *cp = (char *)fhp;
 	int i;
 
 	for (i = 0; i < NFSX_V3FH; i++)
 		if (*cp++ != 0)
 			return (FALSE);
 	return (TRUE);
 }
   
 #endif /* NFS_NOSERVER */
 /*
  * This function compares two net addresses by family and returns TRUE
  * if they are the same host.
  * If there is any doubt, return FALSE.
  * The AF_INET family is handled as a special case so that address mbufs
  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
  */
 int
 netaddr_match(family, haddr, nam)
 	int family;
 	union nethostaddr *haddr;
 	struct sockaddr *nam;
 {
 	register struct sockaddr_in *inetaddr;
 
 	switch (family) {
 	case AF_INET:
 		inetaddr = (struct sockaddr_in *)nam;
 		if (inetaddr->sin_family == AF_INET &&
 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
 			return (1);
 		break;
 #ifdef ISO
 	case AF_ISO:
 	    {
 		register struct sockaddr_iso *isoaddr1, *isoaddr2;
 
 		isoaddr1 = (struct sockaddr_iso *)nam;
 		isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
 		if (isoaddr1->siso_family == AF_ISO &&
 		    isoaddr1->siso_nlen > 0 &&
 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
 		    SAME_ISOADDR(isoaddr1, isoaddr2))
 			return (1);
 		break;
 	    }
 #endif	/* ISO */
 	default:
 		break;
 	};
 	return (0);
 }
 
 static nfsuint64 nfs_nullcookie = { { 0, 0 } };
 /*
  * This function finds the directory cookie that corresponds to the
  * logical byte offset given.
  */
 nfsuint64 *
 nfs_getcookie(np, off, add)
 	register struct nfsnode *np;
 	off_t off;
 	int add;
 {
 	register struct nfsdmap *dp, *dp2;
 	register int pos;
 
 	pos = (uoff_t)off / NFS_DIRBLKSIZ;
 	if (pos == 0 || off < 0) {
 #ifdef DIAGNOSTIC
 		if (add)
 			panic("nfs getcookie add at <= 0");
 #endif
 		return (&nfs_nullcookie);
 	}
 	pos--;
 	dp = np->n_cookies.lh_first;
 	if (!dp) {
 		if (add) {
 			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp->ndm_eocookie = 0;
 			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
 		} else
 			return ((nfsuint64 *)0);
 	}
 	while (pos >= NFSNUMCOOKIES) {
 		pos -= NFSNUMCOOKIES;
 		if (dp->ndm_list.le_next) {
 			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
 				pos >= dp->ndm_eocookie)
 				return ((nfsuint64 *)0);
 			dp = dp->ndm_list.le_next;
 		} else if (add) {
 			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp2->ndm_eocookie = 0;
 			LIST_INSERT_AFTER(dp, dp2, ndm_list);
 			dp = dp2;
 		} else
 			return ((nfsuint64 *)0);
 	}
 	if (pos >= dp->ndm_eocookie) {
 		if (add)
 			dp->ndm_eocookie = pos + 1;
 		else
 			return ((nfsuint64 *)0);
 	}
 	return (&dp->ndm_cookies[pos]);
 }
 
 /*
  * Invalidate cached directory information, except for the actual directory
  * blocks (which are invalidated separately).
  * Done mainly to avoid the use of stale offset cookies.
  */
 void
 nfs_invaldir(vp)
 	register struct vnode *vp;
 {
 	register struct nfsnode *np = VTONFS(vp);
 
 #ifdef DIAGNOSTIC
 	if (vp->v_type != VDIR)
 		panic("nfs: invaldir not dir");
 #endif
 	np->n_direofoffset = 0;
 	np->n_cookieverf.nfsuquad[0] = 0;
 	np->n_cookieverf.nfsuquad[1] = 0;
 	if (np->n_cookies.lh_first)
 		np->n_cookies.lh_first->ndm_eocookie = 0;
 }
 
 /*
  * The write verifier has changed (probably due to a server reboot), so all
  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
  * flag. Once done the new write verifier can be set for the mount point.
  */
 void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
 	register struct vnode *vp, *nvp;
 	register struct buf *bp, *nbp;
 	int s;
 
 	s = splbio();
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		if (vp->v_mount != mp)	/* Paranoia */
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bp->b_flags &= ~B_NEEDCOMMIT;
 		}
 	}
 	splx(s);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
  */
 int
 nfsrv_errmap(nd, err)
 	struct nfsrv_descript *nd;
 	register int err;
 {
 	register short *defaulterrp, *errp;
 
 	if (nd->nd_flag & ND_NFSV3) {
 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
 		while (*++errp) {
 			if (*errp == err)
 				return (err);
 			else if (*errp > err)
 				break;
 		}
 		return ((int)*defaulterrp);
 	    } else
 		return (err & 0xffff);
 	}
 	if (err <= ELAST)
 		return ((int)nfsrv_v2errmap[err - 1]);
 	return (NFSERR_IO);
 }
 
 int
 nfsrv_object_create(vp)
 	struct vnode *vp;
 {
 
 	if (vp == NULL || vp->v_type != VREG)
 		return (1);
 	return (vfs_object_create(vp, curproc,
 				  curproc ? curproc->p_ucred : NULL));
 }
 
 /*
  * Sort the group list in increasing numerical order.
  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
  *  that used to be here.)
  */
 void
 nfsrvw_sort(list, num)
         register gid_t *list;
         register int num;
 {
 	register int i, j;
 	gid_t v;
 
 	/* Insertion sort. */
 	for (i = 1; i < num; i++) {
 		v = list[i];
 		/* find correct slot for value v, moving others up */
 		for (j = i; --j >= 0 && v < list[j];)
 			list[j + 1] = list[j];
 		list[j + 1] = v;
 	}
 }
 
 /*
  * copy credentials making sure that the result can be compared with bcmp().
  */
 void
 nfsrv_setcred(incred, outcred)
 	register struct ucred *incred, *outcred;
 {
 	register int i;
 
 	bzero((caddr_t)outcred, sizeof (struct ucred));
 	outcred->cr_ref = 1;
 	outcred->cr_uid = incred->cr_uid;
 	outcred->cr_ngroups = incred->cr_ngroups;
 	for (i = 0; i < incred->cr_ngroups; i++)
 		outcred->cr_groups[i] = incred->cr_groups[i];
 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
 }
 #endif /* NFS_NOSERVER */
Index: head/sys/nfs/nfs_vnops.c
===================================================================
--- head/sys/nfs/nfs_vnops.c	(revision 49534)
+++ head/sys/nfs/nfs_vnops.c	(revision 49535)
@@ -1,3372 +1,3372 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.137 1999/07/30 04:51:35 wpaul Exp $
+ * $Id: nfs_vnops.c,v 1.138 1999/07/31 01:51:58 msmith Exp $
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <miscfs/fifofs/fifo.h>
-#include <miscfs/specfs/specdev.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_mmap __P((struct vop_mmap_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *, const char *, int,
 			struct ucred *, struct proc *, struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
 static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) nfs_abortop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
 	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
 				struct ucred *cred, struct proc *procp));
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
 				   int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   const char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_int32_t nfs_true, nfs_false;
 extern u_int32_t nfs_xdrneg1;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfsaccess_cache_hits;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
 	   &nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
 
 static int	nfsaccess_cache_misses;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 
 	   &nfsaccess_cache_misses, 0, "NFS ACCESS cache miss count");
 
 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
 static int
 nfs3_access_otw(struct vnode *vp,
 		int wmode,
 		struct proc *p,
 		struct ucred *cred)
 {
 	const int v3 = 1;
 	u_int32_t *tl;
 	int error = 0, attrflag;
 	
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	caddr_t bpos, dpos, cp2;
 	register int32_t t1, t2;
 	register caddr_t cp;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 
 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
 	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 	nfsm_fhtom(vp, v3);
 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(wmode); 
 	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
 	nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		np->n_mode = rmode;
 		np->n_modeuid = cred->cr_uid;
 		np->n_modestamp = time_second;
 	}
 	nfsm_reqdone;
 	return error;
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	int error = 0;
 	u_int32_t mode, wmode;
 	int v3 = NFS_ISV3(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
 		    (ap->a_cred->cr_uid == np->n_modeuid) &&
 		    ((np->n_mode & mode) == mode)) {
 			nfsaccess_cache_hits++;
 		} else {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			nfsaccess_cache_misses++;
 		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
 			if (!error) {
 				if ((np->n_mode & mode) != mode) {
 					error = EACCES;
 				}
 			}
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0)
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
 #ifdef DIAGNOSTIC
 		printf("open eacces vtyp=%d\n",vp->v_type);
 #endif
 		return (EACCES);
 	}
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
 		    np->n_flag &= ~NMODIFIED;
 		} else
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 
 	if (v3 && nfsaccess_cache_timeout > 0) {
 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
 			return (0);
 	}
 
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error) {
 		nfsm_loadattr(vp, ap->a_vap);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags is not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			vnode_pager_setsize(vp, vap->va_size);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error) {
 				vnode_pager_setsize(vp, np->n_size);
  				return (error);
 			    }
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_int32_t *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		nfsm_v3attrbuild(vap, TRUE);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (mode_t)VNOVAL)
 			sp->sa_mode = nfs_xdrneg1;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = nfs_xdrneg1;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = nfs_xdrneg1;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	u_int32_t *tl;
 	caddr_t cp;
 	int32_t t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN))
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error)
 			return (error);
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dvp, 0, p);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN) &&
 		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
 		    	vput(newvp);
 			return (error);
 		}
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN))
 			VOP_UNLOCK(dvp, 0, p);
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent)
 				VOP_UNLOCK(dvp, 0, p);
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		if (len == NFS_MAXPATHLEN) {
 			struct nfsnode *np = VTONFS(vp);
 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 				len = np->n_size;
 		}
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			register u_int32_t x;
 
 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;	/* "begin offset" */
 			*tl++ = x;	/* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;	/* total to this offset */
 			*tl = x;	/* size of this write */
 		}
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					m_freem(mrep);
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		if (error)
 			break;
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_int32_t rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = nfs_xdrneg1;
 	else {
 		VOP_ABORTOP(dvp, cnp);
 		return (EOPNOTSUPP);
 	}
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		nfsm_v3attrbuild(vap, FALSE);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
 			*tl = txdr_unsigned(uminor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *newvp;
 	int error;
 
 	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
 	if (!error)
 		vput(newvp);
 	return (error);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
 #ifdef INET
 			if (!TAILQ_EMPTY(&in_ifaddrhead))
 				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 			else
 #endif
 				*tl++ = create_verf;
 			*tl = ++create_verf;
 		} else {
 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 			nfsm_v3attrbuild(vap, FALSE);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL))
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	const char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 */
 
 	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
 	if (tvp)
 	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	VOP_ABORTOP(tdvp, tcnp);
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	VOP_ABORTOP(fdvp, fcnp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	const char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	const char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3;
 
 	if (vp->v_mount != tdvp->v_mount) {
 		VOP_ABORTOP(tdvp, cnp);
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	v3 = NFS_ISV3(vp);
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (!error)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 	nfsm_reqdone;
 	if (newvp)
 		vput(newvp);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	/*
 	 * cnp's buffer expected to be freed if SAVESTART not set or
 	 * if an error was returned.
 	 */
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	if (dvp == vp)
 		return (EINVAL);
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp = NULL;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				fileno = fxdr_hyper(tl);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			fileno = fxdr_hyper(tl);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np);
 				    if (error)
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit && bigenough) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 				cnp->cn_hash = 0;
 				for (cp = cnp->cn_nameptr, i = 1; i <= len;
 				    i++, cp++)
 				    cnp->cn_hash += (unsigned char)*cp;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    if (newvp == vp)
 				vrele(newvp);
 			    else
 				vput(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	error = nfs_renameit(dvp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	const char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 static int
 nfs_commit(vp, offset, cnt, cred, procp)
 	register struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 
 	if (bp->b_flags & B_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED, p))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 static int
 nfs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT) ||
 			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to 
 			 * initiate I/O on the bp.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= B_WRITEINPROG;
 			vfs_busy_pages(bp, 1);
 
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS 
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				s = splbio();
 				vp->v_numoutput++;
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				splx(s);
 				biodone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "nfsfsync", slpflag, slptimeo);
 			splx(s);
 			if (error == 0)
 				panic("nfs_fsync: inconsistent lock");
 			if (error == ENOLCK)
 				goto loop;
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= (B_ASYNC | B_WRITEINPROG | B_NEEDCOMMIT);
 		splx(s);
 		VOP_BWRITE(bp->b_vp, bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%x",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Just call nfs_writebp() with the force argument set to 1.
  *
  * NOTE: B_DONE may or may not be set in a_bp on call.
  */
 static int
 nfs_bwrite(ap)
 	struct vop_bwrite_args /* {
 		struct vnode *a_bp;
 	} */ *ap;
 {
 	return (nfs_writebp(ap->a_bp, 1, curproc));
 }
 
 /*
  * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
  * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
  * B_CACHE if this is a VMIO buffer.
  */
 int
 nfs_writebp(bp, force, procp)
 	register struct buf *bp;
 	int force;
 	struct proc *procp;
 {
 	int s;
 	int oldflags = bp->b_flags;
 	int retv = 1;
 	off_t off;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not locked???");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return(0);
 	}
 
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 
 	s = splbio();
 	bundirty(bp);
 	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 
 	/*
 	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
 	 * an actual write will have to be scheduled via. VOP_STRATEGY().
 	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
 	 */
 	vfs_busy_pages(bp, 1);
 	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
 		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		bp->b_flags |= B_WRITEINPROG;
 		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 			bp->b_wcred, procp);
 		bp->b_flags &= ~B_WRITEINPROG;
 		if (!retv) {
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 			bp->b_flags &= ~B_NEEDCOMMIT;
 			biodone(bp);
 		} else if (retv == NFSERR_STALEWRITEVERF) {
 			nfs_clearcommit(bp->b_vp->v_mount);
 		}
 	}
 	if (retv) {
 		if (force)
 			bp->b_flags |= B_WRITEINPROG;
 		BUF_KERNPROC(bp);
 		VOP_STRATEGY(bp->b_vp, bp);
 	}
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
 			s = splbio();
 			reassignbuf(bp, bp->b_vp);
 			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		getnanotime(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/nfsclient/nfs_subs.c
===================================================================
--- head/sys/nfsclient/nfs_subs.c	(revision 49534)
+++ head/sys/nfsclient/nfs_subs.c	(revision 49535)
@@ -1,2281 +1,2280 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.78 1999/06/27 11:44:19 peter Exp $
+ * $Id: nfs_subs.c,v 1.79 1999/07/17 18:43:47 phk Exp $
  */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
  * the nfs op functions. They do things like create the rpc header and
  * copy data between mbuf chains and uio lists.
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsrtt.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <netinet/in.h>
 #ifdef ISO
 #include <netiso/iso.h>
 #endif
 
 /*
  * Data items converted to xdr at startup, since they are constant
  * This is kinda hokey, but may save a little time doing byte swaps
  */
 u_int32_t nfs_xdrneg1;
 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
 	rpc_auth_kerb;
 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
 
 /* And other global data */
 static u_int32_t nfs_xid = 0;
 static enum vtype nv2tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON 
 };
 enum vtype nv3tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
 };
 
 int nfs_ticks;
 int nfs_pbuf_freecnt = -1;	/* start out unlimited */
 
 struct nfs_reqq nfs_reqq;
 struct nfssvc_sockhead nfssvc_sockhead;
 int nfssvc_sockhead_flag;
 struct nfsd_head nfsd_head;
 int nfsd_head_flag;
 struct nfs_bufq nfs_bufq;
 struct nqtimerhead nqtimerhead;
 struct nqfhhashhead *nqfhhashtbl;
 u_long nqfhhash;
 
 static void (*nfs_prev_lease_updatetime) __P((int));
 static int nfs_prev_nfssvc_sy_narg;
 static sy_call_t *nfs_prev_nfssvc_sy_call;
 
 #ifndef NFS_NOSERVER
 
 static vop_t *nfs_prev_vop_lease_check;
 static int nfs_prev_getfh_sy_narg;
 static sy_call_t *nfs_prev_getfh_sy_call;
 
 /*
  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
  */
 int nfsv3_procid[NFS_NPROCS] = {
 	NFSPROC_NULL,
 	NFSPROC_GETATTR,
 	NFSPROC_SETATTR,
 	NFSPROC_NOOP,
 	NFSPROC_LOOKUP,
 	NFSPROC_READLINK,
 	NFSPROC_READ,
 	NFSPROC_NOOP,
 	NFSPROC_WRITE,
 	NFSPROC_CREATE,
 	NFSPROC_REMOVE,
 	NFSPROC_RENAME,
 	NFSPROC_LINK,
 	NFSPROC_SYMLINK,
 	NFSPROC_MKDIR,
 	NFSPROC_RMDIR,
 	NFSPROC_READDIR,
 	NFSPROC_FSSTAT,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP
 };
 
 #endif /* NFS_NOSERVER */
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
 int nfsv2_procid[NFS_NPROCS] = {
 	NFSV2PROC_NULL,
 	NFSV2PROC_GETATTR,
 	NFSV2PROC_SETATTR,
 	NFSV2PROC_LOOKUP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_READLINK,
 	NFSV2PROC_READ,
 	NFSV2PROC_WRITE,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_MKDIR,
 	NFSV2PROC_SYMLINK,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_REMOVE,
 	NFSV2PROC_RMDIR,
 	NFSV2PROC_RENAME,
 	NFSV2PROC_LINK,
 	NFSV2PROC_READDIR,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_STATFS,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 };
 
 #ifndef NFS_NOSERVER
 /*
  * Maps errno values to nfs error numbers.
  * Use NFSERR_IO as the catch all for ones not specifically defined in
  * RFC 1094.
  */
 static u_char nfsrv_v2errmap[ELAST] = {
   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO /* << Last is 86 */
 };
 
 /*
  * Maps errno values to nfs error numbers.
  * Although it is not obvious whether or not NFS clients really care if
  * a returned error value is in the specified list for the procedure, the
  * safest thing to do is filter them appropriately. For Version 2, the
  * X/Open XNFS document is the only specification that defines error values
  * for each RPC (The RFC simply lists all possible error values for all RPCs),
  * so I have decided to not do this for Version 2.
  * The first entry is the default error return and the rest are the valid
  * errors for that RPC in increasing numeric order.
  */
 static short nfsv3err_null[] = {
 	0,
 	0,
 };
 
 static short nfsv3err_getattr[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_setattr[] = {
 	NFSERR_IO,
 	NFSERR_PERM,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOT_SYNC,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_lookup[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_access[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_read[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_NXIO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_write[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_FBIG,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_create[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mkdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_symlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mknod[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	NFSERR_BADTYPE,
 	0,
 };
 
 static short nfsv3err_remove[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rmdir[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rename[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_ISDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_link[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdirplus[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_NOTSUPP,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsstat[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsinfo[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_pathconf[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_commit[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short *nfsrv_v3errmap[] = {
 	nfsv3err_null,
 	nfsv3err_getattr,
 	nfsv3err_setattr,
 	nfsv3err_lookup,
 	nfsv3err_access,
 	nfsv3err_readlink,
 	nfsv3err_read,
 	nfsv3err_write,
 	nfsv3err_create,
 	nfsv3err_mkdir,
 	nfsv3err_symlink,
 	nfsv3err_mknod,
 	nfsv3err_remove,
 	nfsv3err_rmdir,
 	nfsv3err_rename,
 	nfsv3err_link,
 	nfsv3err_readdir,
 	nfsv3err_readdirplus,
 	nfsv3err_fsstat,
 	nfsv3err_fsinfo,
 	nfsv3err_pathconf,
 	nfsv3err_commit,
 };
 
 #endif /* NFS_NOSERVER */
 
 extern struct nfsrtt nfsrtt;
 extern time_t nqnfsstarttime;
 extern int nqsrv_clockskew;
 extern int nqsrv_writeslack;
 extern int nqsrv_maxlease;
 extern struct nfsstats nfsstats;
 extern int nqnfs_piggy[NFS_NPROCS];
 extern nfstype nfsv2_type[9];
 extern nfstype nfsv3_type[9];
 extern struct nfsnodehashhead *nfsnodehashtbl;
 extern u_long nfsnodehash;
 
 struct getfh_args;
 extern int getfh(struct proc *, struct getfh_args *, int *);
 struct nfssvc_args;
 extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
 
 LIST_HEAD(nfsnodehashhead, nfsnode);
 
 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
 
 u_quad_t
 nfs_curusec() 
 {
 	struct timeval tv;
 	
 	getmicrotime(&tv);
 	return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
 }
 
 /*
  * Create the header for an rpc request packet
  * The hsiz is the size of the rest of the nfs request header.
  * (just used to decide if a cluster is a good idea)
  */
 struct mbuf *
 nfsm_reqh(vp, procid, hsiz, bposp)
 	struct vnode *vp;
 	u_long procid;
 	int hsiz;
 	caddr_t *bposp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	struct mbuf *mb2;
 	struct nfsmount *nmp;
 	int nqflag;
 
 	MGET(mb, M_WAIT, MT_DATA);
 	if (hsiz >= MINCLSIZE)
 		MCLGET(mb, M_WAIT);
 	mb->m_len = 0;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * For NQNFS, add lease request.
 	 */
 	if (vp) {
 		nmp = VFSTONFS(vp->v_mount);
 		if (nmp->nm_flag & NFSMNT_NQNFS) {
 			nqflag = NQNFS_NEEDLEASE(vp, procid);
 			if (nqflag) {
 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(nqflag);
 				*tl = txdr_unsigned(nmp->nm_leaseterm);
 			} else {
 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = 0;
 			}
 		}
 	}
 	/* Finally, return values */
 	*bposp = bpos;
 	return (mb);
 }
 
 /*
  * Build the RPC header and fill in the authorization info.
  * The authorization string argument is only used when the credentials
  * come from outside of the kernel.
  * Returns the head of the mbuf list.
  */
 struct mbuf *
 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
 	verf_str, mrest, mrest_len, mbp, xidp)
 	register struct ucred *cr;
 	int nmflag;
 	int procid;
 	int auth_type;
 	int auth_len;
 	char *auth_str;
 	int verf_len;
 	char *verf_str;
 	struct mbuf *mrest;
 	int mrest_len;
 	struct mbuf **mbp;
 	u_int32_t *xidp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	register int i;
 	struct mbuf *mreq, *mb2;
 	int siz, grpsiz, authsiz;
 
 	authsiz = nfsm_rndup(auth_len);
 	MGETHDR(mb, M_WAIT, MT_DATA);
 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
 		MCLGET(mb, M_WAIT);
 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
 	} else {
 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
 	}
 	mb->m_len = 0;
 	mreq = mb;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * First the RPC header.
 	 */
 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 
 	/* Get a pretty random xid to start with */
 	if (!nfs_xid) 
 		nfs_xid = random();
 	/*
 	 * Skip zero xid if it should ever happen.
 	 */
 	if (++nfs_xid == 0)
 		nfs_xid++;
 
 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
 	*tl++ = rpc_call;
 	*tl++ = rpc_vers;
 	if (nmflag & NFSMNT_NQNFS) {
 		*tl++ = txdr_unsigned(NQNFS_PROG);
 		*tl++ = txdr_unsigned(NQNFS_VER3);
 	} else {
 		*tl++ = txdr_unsigned(NFS_PROG);
 		if (nmflag & NFSMNT_NFSV3)
 			*tl++ = txdr_unsigned(NFS_VER3);
 		else
 			*tl++ = txdr_unsigned(NFS_VER2);
 	}
 	if (nmflag & NFSMNT_NFSV3)
 		*tl++ = txdr_unsigned(procid);
 	else
 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
 
 	/*
 	 * And then the authorization cred.
 	 */
 	*tl++ = txdr_unsigned(auth_type);
 	*tl = txdr_unsigned(authsiz);
 	switch (auth_type) {
 	case RPCAUTH_UNIX:
 		nfsm_build(tl, u_int32_t *, auth_len);
 		*tl++ = 0;		/* stamp ?? */
 		*tl++ = 0;		/* NULL hostname */
 		*tl++ = txdr_unsigned(cr->cr_uid);
 		*tl++ = txdr_unsigned(cr->cr_groups[0]);
 		grpsiz = (auth_len >> 2) - 5;
 		*tl++ = txdr_unsigned(grpsiz);
 		for (i = 1; i <= grpsiz; i++)
 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
 		break;
 	case RPCAUTH_KERB4:
 		siz = auth_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(auth_str, bpos, i);
 			mb->m_len += i;
 			auth_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 		break;
 	};
 
 	/*
 	 * And the verifier...
 	 */
 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	if (verf_str) {
 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
 		*tl = txdr_unsigned(verf_len);
 		siz = verf_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(verf_str, bpos, i);
 			mb->m_len += i;
 			verf_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 	} else {
 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
 		*tl = 0;
 	}
 	mb->m_next = mrest;
 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
 	*mbp = mb;
 	return (mreq);
 }
 
 /*
  * copies mbuf chain to the uio scatter/gather list
  */
 int
 nfsm_mbuftouio(mrep, uiop, siz, dpos)
 	struct mbuf **mrep;
 	register struct uio *uiop;
 	int siz;
 	caddr_t *dpos;
 {
 	register char *mbufcp, *uiocp;
 	register int xfer, left, len;
 	register struct mbuf *mp;
 	long uiosiz, rem;
 	int error = 0;
 
 	mp = *mrep;
 	mbufcp = *dpos;
 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
 	rem = nfsm_rndup(siz)-siz;
 	while (siz > 0) {
 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
 			return (EFBIG);
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			while (len == 0) {
 				mp = mp->m_next;
 				if (mp == NULL)
 					return (EBADRPC);
 				mbufcp = mtod(mp, caddr_t);
 				len = mp->m_len;
 			}
 			xfer = (left > len) ? len : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(mbufcp, uiocp, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(mbufcp, uiocp, xfer);
 			else
 				copyout(mbufcp, uiocp, xfer);
 			left -= xfer;
 			len -= xfer;
 			mbufcp += xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		if (uiop->uio_iov->iov_len <= siz) {
 			uiop->uio_iovcnt--;
 			uiop->uio_iov++;
 		} else {
 			uiop->uio_iov->iov_base += uiosiz;
 			uiop->uio_iov->iov_len -= uiosiz;
 		}
 		siz -= uiosiz;
 	}
 	*dpos = mbufcp;
 	*mrep = mp;
 	if (rem > 0) {
 		if (len < rem)
 			error = nfs_adv(mrep, dpos, rem, len);
 		else
 			*dpos += rem;
 	}
 	return (error);
 }
 
 /*
  * copies a uio scatter/gather list to an mbuf chain.
  * NOTE: can ony handle iovcnt == 1
  */
 int
 nfsm_uiotombuf(uiop, mq, siz, bpos)
 	register struct uio *uiop;
 	struct mbuf **mq;
 	int siz;
 	caddr_t *bpos;
 {
 	register char *uiocp;
 	register struct mbuf *mp, *mp2;
 	register int xfer, left, mlen;
 	int uiosiz, clflg, rem;
 	char *cp;
 
 #ifdef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfsm_uiotombuf: iovcnt != 1");
 #endif
 
 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
 		clflg = 1;
 	else
 		clflg = 0;
 	rem = nfsm_rndup(siz)-siz;
 	mp = mp2 = *mq;
 	while (siz > 0) {
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			mlen = M_TRAILINGSPACE(mp);
 			if (mlen == 0) {
 				MGET(mp, M_WAIT, MT_DATA);
 				if (clflg)
 					MCLGET(mp, M_WAIT);
 				mp->m_len = 0;
 				mp2->m_next = mp;
 				mp2 = mp;
 				mlen = M_TRAILINGSPACE(mp);
 			}
 			xfer = (left > mlen) ? mlen : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			mp->m_len += xfer;
 			left -= xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		uiop->uio_iov->iov_base += uiosiz;
 		uiop->uio_iov->iov_len -= uiosiz;
 		siz -= uiosiz;
 	}
 	if (rem > 0) {
 		if (rem > M_TRAILINGSPACE(mp)) {
 			MGET(mp, M_WAIT, MT_DATA);
 			mp->m_len = 0;
 			mp2->m_next = mp;
 		}
 		cp = mtod(mp, caddr_t)+mp->m_len;
 		for (left = 0; left < rem; left++)
 			*cp++ = '\0';
 		mp->m_len += rem;
 		*bpos = cp;
 	} else
 		*bpos = mtod(mp, caddr_t)+mp->m_len;
 	*mq = mp;
 	return (0);
 }
 
 /*
  * Help break down an mbuf chain by setting the first siz bytes contiguous
  * pointed to by returned val.
  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
  * cases. (The macros use the vars. dpos and dpos2)
  */
 int
 nfsm_disct(mdp, dposp, siz, left, cp2)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int siz;
 	int left;
 	caddr_t *cp2;
 {
 	register struct mbuf *mp, *mp2;
 	register int siz2, xfer;
 	register caddr_t p;
 
 	mp = *mdp;
 	while (left == 0) {
 		*mdp = mp = mp->m_next;
 		if (mp == NULL)
 			return (EBADRPC);
 		left = mp->m_len;
 		*dposp = mtod(mp, caddr_t);
 	}
 	if (left >= siz) {
 		*cp2 = *dposp;
 		*dposp += siz;
 	} else if (mp->m_next == NULL) {
 		return (EBADRPC);
 	} else if (siz > MHLEN) {
 		panic("nfs S too big");
 	} else {
 		MGET(mp2, M_WAIT, MT_DATA);
 		mp2->m_next = mp->m_next;
 		mp->m_next = mp2;
 		mp->m_len -= left;
 		mp = mp2;
 		*cp2 = p = mtod(mp, caddr_t);
 		bcopy(*dposp, p, left);		/* Copy what was left */
 		siz2 = siz-left;
 		p += left;
 		mp2 = mp->m_next;
 		/* Loop around copying up the siz2 bytes */
 		while (siz2 > 0) {
 			if (mp2 == NULL)
 				return (EBADRPC);
 			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
 			if (xfer > 0) {
 				bcopy(mtod(mp2, caddr_t), p, xfer);
 				NFSMADV(mp2, xfer);
 				mp2->m_len -= xfer;
 				p += xfer;
 				siz2 -= xfer;
 			}
 			if (siz2 > 0)
 				mp2 = mp2->m_next;
 		}
 		mp->m_len = siz;
 		*mdp = mp2;
 		*dposp = mtod(mp2, caddr_t);
 	}
 	return (0);
 }
 
 /*
  * Advance the position in the mbuf chain.
  */
 int
 nfs_adv(mdp, dposp, offs, left)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int offs;
 	int left;
 {
 	register struct mbuf *m;
 	register int s;
 
 	m = *mdp;
 	s = left;
 	while (s < offs) {
 		offs -= s;
 		m = m->m_next;
 		if (m == NULL)
 			return (EBADRPC);
 		s = m->m_len;
 	}
 	*mdp = m;
 	*dposp = mtod(m, caddr_t)+offs;
 	return (0);
 }
 
 /*
  * Copy a string into mbufs for the hard cases...
  */
 int
 nfsm_strtmbuf(mb, bpos, cp, siz)
 	struct mbuf **mb;
 	char **bpos;
 	const char *cp;
 	long siz;
 {
 	register struct mbuf *m1 = NULL, *m2;
 	long left, xfer, len, tlen;
 	u_int32_t *tl;
 	int putsize;
 
 	putsize = 1;
 	m2 = *mb;
 	left = M_TRAILINGSPACE(m2);
 	if (left > 0) {
 		tl = ((u_int32_t *)(*bpos));
 		*tl++ = txdr_unsigned(siz);
 		putsize = 0;
 		left -= NFSX_UNSIGNED;
 		m2->m_len += NFSX_UNSIGNED;
 		if (left > 0) {
 			bcopy(cp, (caddr_t) tl, left);
 			siz -= left;
 			cp += left;
 			m2->m_len += left;
 			left = 0;
 		}
 	}
 	/* Loop around adding mbufs */
 	while (siz > 0) {
 		MGET(m1, M_WAIT, MT_DATA);
 		if (siz > MLEN)
 			MCLGET(m1, M_WAIT);
 		m1->m_len = NFSMSIZ(m1);
 		m2->m_next = m1;
 		m2 = m1;
 		tl = mtod(m1, u_int32_t *);
 		tlen = 0;
 		if (putsize) {
 			*tl++ = txdr_unsigned(siz);
 			m1->m_len -= NFSX_UNSIGNED;
 			tlen = NFSX_UNSIGNED;
 			putsize = 0;
 		}
 		if (siz < m1->m_len) {
 			len = nfsm_rndup(siz);
 			xfer = siz;
 			if (xfer < len)
 				*(tl+(xfer>>2)) = 0;
 		} else {
 			xfer = len = m1->m_len;
 		}
 		bcopy(cp, (caddr_t) tl, xfer);
 		m1->m_len = len+tlen;
 		siz -= xfer;
 		cp += xfer;
 	}
 	*mb = m1;
 	*bpos = mtod(m1, caddr_t)+m1->m_len;
 	return (0);
 }
 
 /*
  * Called once to initialize data structures...
  */
 int
 nfs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 	register int i;
 
 	nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
 
 	/*
 	 * Check to see if major data structures haven't bloated.
 	 */
 	if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
 		printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
 		printf("Try reducing NFS_UIDHASHSIZ\n");
 	}
 	if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
 		printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
 		printf("Try unionizing the nu_nickname and nu_flag fields\n");
 	}
 	nfs_mount_type = vfsp->vfc_typenum;
 	nfsrtt.pos = 0;
 	rpc_vers = txdr_unsigned(RPC_VER2);
 	rpc_call = txdr_unsigned(RPC_CALL);
 	rpc_reply = txdr_unsigned(RPC_REPLY);
 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
 	nfs_prog = txdr_unsigned(NFS_PROG);
 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
 	nfs_true = txdr_unsigned(TRUE);
 	nfs_false = txdr_unsigned(FALSE);
 	nfs_xdrneg1 = txdr_unsigned(-1);
 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
 	if (nfs_ticks < 1)
 		nfs_ticks = 1;
 	/* Ensure async daemons disabled */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
 		nfs_iodwant[i] = (struct proc *)0;
 		nfs_iodmount[i] = (struct nfsmount *)0;
 	}
 	nfs_nhinit();			/* Init the nfsnode table */
 #ifndef NFS_NOSERVER
 	nfsrv_init(0);			/* Init server data structures */
 	nfsrv_initcache();		/* Init the server request cache */
 #endif
 
 	/*
 	 * Initialize the nqnfs server stuff.
 	 */
 	if (nqnfsstarttime == 0) {
 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
 			+ nqsrv_clockskew + nqsrv_writeslack;
 		NQLOADNOVRAM(nqnfsstarttime);
 		CIRCLEQ_INIT(&nqtimerhead);
 		nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
 	}
 
 	/*
 	 * Initialize reply list and start timer
 	 */
 	TAILQ_INIT(&nfs_reqq);
 
 	nfs_timer(0);
 
 	/*
 	 * Set up lease_check and lease_updatetime so that other parts
 	 * of the system can call us, if we are loadable.
 	 */
 #ifndef NFS_NOSERVER
 	nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
 	default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
 #endif
 	nfs_prev_lease_updatetime = lease_updatetime;
 	lease_updatetime = nfs_lease_updatetime;
 	nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
 	sysent[SYS_nfssvc].sy_narg = 2;
 	nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
 	sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
 #ifndef NFS_NOSERVER
 	nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
 	sysent[SYS_getfh].sy_narg = 2;
 	nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
 	sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
 #endif
 
 	nfs_pbuf_freecnt = nswbuf / 2 + 1;
 
 	return (0);
 }
 
 int
 nfs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
 	nfs_mount_type = -1;
 #ifndef NFS_NOSERVER
 	default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
 #endif
 	lease_updatetime = nfs_prev_lease_updatetime;
 	sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
 	sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
 #ifndef NFS_NOSERVER
 	sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
 	sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
 #endif
 	return (0);
 }
 
 /*
  * Attribute cache routines.
  * nfs_loadattrcache() - loads or updates the cache contents from attributes
  *	that are on the mbuf list
  * nfs_getattrcache() - returns valid attributes if found in cache, returns
  *	error otherwise
  */
 
 /*
  * Load the attribute cache (that lives in the nfsnode entry) with
  * the values on the mbuf list and
  * Iff vap not NULL
  *    copy the attributes to *vaper
  */
 int
 nfs_loadattrcache(vpp, mdp, dposp, vaper)
 	struct vnode **vpp;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vattr *vaper;
 {
 	register struct vnode *vp = *vpp;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 	register struct nfsnode *np;
 	register int32_t t1;
 	caddr_t cp2;
 	int error = 0, rdev;
 	struct mbuf *md;
 	enum vtype vtyp;
 	u_short vmode;
 	struct timespec mtime;
 	struct vnode *nvp;
 	int v3 = NFS_ISV3(vp);
 
 	md = *mdp;
 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
 	if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
 		return (error);
 	fp = (struct nfs_fattr *)cp2;
 	if (v3) {
 		vtyp = nfsv3tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
 			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
 	} else {
 		vtyp = nfsv2tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		/*
 		 * XXX
 		 *
 		 * The duplicate information returned in fa_type and fa_mode
 		 * is an ambiguity in the NFS version 2 protocol.
 		 *
 		 * VREG should be taken literally as a regular file.  If a
 		 * server intents to return some type information differently
 		 * in the upper bits of the mode field (e.g. for sockets, or
 		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
 		 * leave the examination of the mode bits even in the VREG
 		 * case to avoid breakage for bogus servers, but we make sure
 		 * that there are actually type bits set in the upper part of
 		 * fa_mode (and failing that, trust the va_type field).
 		 *
 		 * NFSv3 cleared the issue, and requires fa_mode to not
 		 * contain any type information (while also introduing sockets
 		 * and FIFOs for fa_type).
 		 */
 		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
 			vtyp = IFTOVT(vmode);
 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
 
 		/*
 		 * Really ugly NFSv2 kludge.
 		 */
 		if (vtyp == VCHR && rdev == 0xffffffff)
 			vtyp = VFIFO;
 	}
 
 	/*
 	 * If v_type == VNON it is a new node, so fill in the v_type,
 	 * n_mtime fields. Check to see if it represents a special
 	 * device, and if so, check for a possible alias. Once the
 	 * correct vnode has been obtained, fill in the rest of the
 	 * information.
 	 */
 	np = VTONFS(vp);
 	if (vp->v_type != vtyp) {
 		vp->v_type = vtyp;
 		if (vp->v_type == VFIFO) {
 			vp->v_op = fifo_nfsv2nodeop_p;
 		}
 		if (vp->v_type == VCHR || vp->v_type == VBLK) {
 			vp->v_op = spec_nfsv2nodeop_p;
 			nvp = checkalias(vp, rdev, vp->v_mount);
 			if (nvp) {
 				/*
 				 * Discard unneeded vnode, but save its nfsnode.
 				 * Since the nfsnode does not have a lock, its
 				 * vnode lock has to be carried over.
 				 */
 				nvp->v_vnlock = vp->v_vnlock;
 				vp->v_vnlock = NULL;
 				nvp->v_data = vp->v_data;
 				vp->v_data = NULL;
 				vp->v_op = spec_vnodeop_p;
 				vrele(vp);
 				vgone(vp);
 				/*
 				 * Reinitialize aliased node.
 				 */
 				np->n_vnode = nvp;
 				*vpp = vp = nvp;
 			}
 		}
 		np->n_mtime = mtime.tv_sec;
 	}
 	vap = &np->n_vattr;
 	vap->va_type = vtyp;
 	vap->va_mode = (vmode & 07777);
 	vap->va_rdev = rdev;
 	vap->va_mtime = mtime;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	if (v3) {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_hyper(&fp->fa3_size);
 		vap->va_blocksize = NFS_FABLKSIZE;
 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
 		vap->va_fileid = fxdr_unsigned(int32_t,
 		    fp->fa3_fileid.nfsuquad[1]);
 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
 		vap->va_flags = 0;
 		vap->va_filerev = 0;
 	} else {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
 		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
 		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
 		    * NFS_FABLKSIZE;
 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
 		vap->va_flags = 0;
 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
 		    fp->fa2_ctime.nfsv2_sec);
 		vap->va_ctime.tv_nsec = 0;
 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	np->n_attrstamp = time_second;
 	if (vaper != NULL) {
 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
 		if (np->n_flag & NCHG) {
 			if (np->n_flag & NACC)
 				vaper->va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vaper->va_mtime = np->n_mtim;
 		}
 	}
 	return (0);
 }
 
 #ifdef NFS_ACDEBUG
 #include <sys/sysctl.h>
 SYSCTL_DECL(_vfs_nfs);
 static int nfs_acdebug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
 #endif
 
 /*
  * Check the time stamp
  * If the cache is valid, copy contents to *vap and return 0
  * otherwise return an error
  */
 int
 nfs_getattrcache(vp, vaper)
 	register struct vnode *vp;
 	struct vattr *vaper;
 {
 	register struct nfsnode *np;
 	register struct vattr *vap;
 	struct nfsmount *nmp;
 	int timeo;
 
 	np = VTONFS(vp);
 	vap = &np->n_vattr;
 	nmp = VFSTONFS(vp->v_mount);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime) / 10;
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug>1)
 		printf("nfs_getattrcache: initial timeo = %d\n", timeo);
 #endif
 
 	if (vap->va_type == VDIR) {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
 			timeo = nmp->nm_acdirmin;
 		else if (timeo > nmp->nm_acdirmax)
 			timeo = nmp->nm_acdirmax;
 	} else {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
 			timeo = nmp->nm_acregmin;
 		else if (timeo > nmp->nm_acregmax)
 			timeo = nmp->nm_acregmax;
 	}
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug > 2)
 		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
 			nmp->nm_acregmin, nmp->nm_acregmax,
 			nmp->nm_acdirmin, nmp->nm_acdirmax);
 
 	if (nfs_acdebug)
 		printf("nfs_getattrcache: age = %d; final timeo = %d\n",
 			(time_second - np->n_attrstamp), timeo);
 #endif
 
 	if ((time_second - np->n_attrstamp) >= timeo) {
 		nfsstats.attrcache_misses++;
 		return (ENOENT);
 	}
 	nfsstats.attrcache_hits++;
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
 	if (np->n_flag & NCHG) {
 		if (np->n_flag & NACC)
 			vaper->va_atime = np->n_atim;
 		if (np->n_flag & NUPD)
 			vaper->va_mtime = np->n_mtim;
 	}
 	return (0);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Set up nameidata for a lookup() call and do it.
  *
  * If pubflag is set, this call is done for a lookup operation on the
  * public filehandle. In that case we allow crossing mountpoints and
  * absolute pathnames. However, the caller is expected to check that
  * the lookup result is within the public fs, and deny access if
  * it is not.
  *
  * nfs_namei() clears out garbage fields that namei() might leave garbage.
  * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
  * error occurs but the parent was not requested.
  *
  * dirp may be set whether an error is returned or not, and must be 
  * released by the caller.
  */
 int
 nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
 	register struct nameidata *ndp;
 	fhandle_t *fhp;
 	int len;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vnode **retdirp;
 	struct proc *p;
 	int kerbflag, pubflag;
 {
 	register int i, rem;
 	register struct mbuf *md;
 	register char *fromcp, *tocp, *cp;
 	struct iovec aiov;
 	struct uio auio;
 	struct vnode *dp;
 	int error, rdonly, linklen;
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	*retdirp = (struct vnode *)0;
 	cnp->cn_pnbuf = zalloc(namei_zone);
 
 	/*
 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
 	 * and set the various ndp fields appropriately.
 	 */
 	fromcp = *dposp;
 	tocp = cnp->cn_pnbuf;
 	md = *mdp;
 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
 	cnp->cn_hash = 0;
 	for (i = 0; i < len; i++) {
 		while (rem == 0) {
 			md = md->m_next;
 			if (md == NULL) {
 				error = EBADRPC;
 				goto out;
 			}
 			fromcp = mtod(md, caddr_t);
 			rem = md->m_len;
 		}
 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
 			error = EACCES;
 			goto out;
 		}
 		cnp->cn_hash += (unsigned char)*fromcp;
 		*tocp++ = *fromcp++;
 		rem--;
 	}
 	*tocp = '\0';
 	*mdp = md;
 	*dposp = fromcp;
 	len = nfsm_rndup(len)-len;
 	if (len > 0) {
 		if (rem >= len)
 			*dposp += len;
 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
 			goto out;
 	}
 
 	/*
 	 * Extract and set starting directory.
 	 */
 	error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
 	    nam, &rdonly, kerbflag, pubflag);
 	if (error)
 		goto out;
 	if (dp->v_type != VDIR) {
 		vrele(dp);
 		error = ENOTDIR;
 		goto out;
 	}
 
 	if (rdonly)
 		cnp->cn_flags |= RDONLY;
 
 	/*
 	 * Set return directory.  Reference to dp is implicitly transfered 
 	 * to the returned pointer
 	 */
 	*retdirp = dp;
 
 	if (pubflag) {
 		/*
 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
 		 * and the 'native path' indicator.
 		 */
 		cp = zalloc(namei_zone);
 		fromcp = cnp->cn_pnbuf;
 		tocp = cp;
 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
 			switch ((unsigned char)*fromcp) {
 			case WEBNFS_NATIVE_CHAR:
 				/*
 				 * 'Native' path for us is the same
 				 * as a path according to the NFS spec,
 				 * just skip the escape char.
 				 */
 				fromcp++;
 				break;
 			/*
 			 * More may be added in the future, range 0x80-0xff
 			 */
 			default:
 				error = EIO;
 				zfree(namei_zone, cp);
 				goto out;
 			}
 		}
 		/*
 		 * Translate the '%' escapes, URL-style.
 		 */
 		while (*fromcp != '\0') {
 			if (*fromcp == WEBNFS_ESC_CHAR) {
 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
 					fromcp++;
 					*tocp++ = HEXSTRTOI(fromcp);
 					fromcp += 2;
 					continue;
 				} else {
 					error = ENOENT;
 					zfree(namei_zone, cp);
 					goto out;
 				}
 			} else
 				*tocp++ = *fromcp++;
 		}
 		*tocp = '\0';
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_pnbuf = cp;
 	}
 
 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (pubflag) {
 		ndp->ni_rootdir = rootvnode;
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/')
 			dp = rootvnode;
 	} else {
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * becuase lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_proc = p;
 	VREF(dp);
 	ndp->ni_startdir = dp;
 
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial 
 		 * termination occurs if no symlink encountered.
 		 * Note: zfree is safe because error is 0, so we will
 		 * not zfree it again when we break.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			nfsrv_object_create(ndp->ni_vp);
 			if (cnp->cn_flags & (SAVENAME | SAVESTART))
 				cnp->cn_flags |= HASBUF;
 			else
 				zfree(namei_zone, cnp->cn_pnbuf);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			VOP_UNLOCK(ndp->ni_dvp, 0, p);
 		if (!pubflag) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = zalloc(namei_zone);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = (struct proc *)0;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory 
 		 * should replace current directory.  Normally ni_dvp 
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 
 	/*
 	 * nfs_namei() guarentees that fields will not contain garbage
 	 * whether an error occurs or not.  This allows the caller to track
 	 * cleanup state trivially.
 	 */
 out:
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 		cnp->cn_flags &= ~HASBUF;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 	return (error);
 }
 
 /*
  * A fiddled version of m_adj() that ensures null fill to a long
  * boundary and only trims off the back end
  */
 void
 nfsm_adj(mp, len, nul)
 	struct mbuf *mp;
 	register int len;
 	int nul;
 {
 	register struct mbuf *m;
 	register int count, i;
 	register char *cp;
 
 	/*
 	 * Trim from tail.  Scan the mbuf chain,
 	 * calculating its length and finding the last mbuf.
 	 * If the adjustment only affects this mbuf, then just
 	 * adjust and return.  Otherwise, rescan and truncate
 	 * after the remaining size.
 	 */
 	count = 0;
 	m = mp;
 	for (;;) {
 		count += m->m_len;
 		if (m->m_next == (struct mbuf *)0)
 			break;
 		m = m->m_next;
 	}
 	if (m->m_len > len) {
 		m->m_len -= len;
 		if (nul > 0) {
 			cp = mtod(m, caddr_t)+m->m_len-nul;
 			for (i = 0; i < nul; i++)
 				*cp++ = '\0';
 		}
 		return;
 	}
 	count -= len;
 	if (count < 0)
 		count = 0;
 	/*
 	 * Correct length for chain is "count".
 	 * Find the mbuf with last data, adjust its length,
 	 * and toss data from remaining mbufs on chain.
 	 */
 	for (m = mp; m; m = m->m_next) {
 		if (m->m_len >= count) {
 			m->m_len = count;
 			if (nul > 0) {
 				cp = mtod(m, caddr_t)+m->m_len-nul;
 				for (i = 0; i < nul; i++)
 					*cp++ = '\0';
 			}
 			break;
 		}
 		count -= m->m_len;
 	}
 	for (m = m->m_next;m;m = m->m_next)
 		m->m_len = 0;
 }
 
 /*
  * Make these functions instead of macros, so that the kernel text size
  * doesn't get too big...
  */
 void
 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int before_ret;
 	register struct vattr *before_vap;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 
 	if (before_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		*tl++ = nfs_true;
 		txdr_hyper(before_vap->va_size, tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
 	}
 	*bposp = bpos;
 	*mbp = mb;
 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
 }
 
 void
 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 	register struct nfs_fattr *fp;
 
 	if (after_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
 		*tl++ = nfs_true;
 		fp = (struct nfs_fattr *)tl;
 		nfsm_srvfattr(nfsd, after_vap, fp);
 	}
 	*mbp = mb;
 	*bposp = bpos;
 }
 
 void
 nfsm_srvfattr(nfsd, vap, fp)
 	register struct nfsrv_descript *nfsd;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 {
 
 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
 	fp->fa_uid = txdr_unsigned(vap->va_uid);
 	fp->fa_gid = txdr_unsigned(vap->va_gid);
 	if (nfsd->nd_flag & ND_NFSV3) {
 		fp->fa_type = vtonfsv3_type(vap->va_type);
 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
 		txdr_hyper(vap->va_size, &fp->fa3_size);
 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
 		fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
 		fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
 		fp->fa3_fsid.nfsuquad[0] = 0;
 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
 		fp->fa3_fileid.nfsuquad[0] = 0;
 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
 	} else {
 		fp->fa_type = vtonfsv2_type(vap->va_type);
 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		fp->fa2_size = txdr_unsigned(vap->va_size);
 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
 		if (vap->va_type == VFIFO)
 			fp->fa2_rdev = 0xffffffff;
 		else
 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
 	}
 }
 
 /*
  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling VFS_FHTOVP()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	- if not lockflag unlock it with VOP_UNLOCK()
  */
 int
 nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
 	fhandle_t *fhp;
 	int lockflag;
 	struct vnode **vpp;
 	struct ucred *cred;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	int *rdonlyp;
 	int kerbflag;
 	int pubflag;
 {
 	struct proc *p = curproc; /* XXX */
 	register struct mount *mp;
 	register int i;
 	struct ucred *credanon;
 	int error, exflags;
 #ifdef MNT_EXNORESPORT		/* XXX needs mountd and /etc/exports help yet */
 	struct sockaddr_int *saddr;
 #endif
 
 	*vpp = (struct vnode *)0;
 
 	if (nfs_ispublicfh(fhp)) {
 		if (!pubflag || !nfs_pub.np_valid)
 			return (ESTALE);
 		fhp = &nfs_pub.np_handle;
 	}
 
 	mp = vfs_getvfs(&fhp->fh_fsid);
 	if (!mp)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon);
 	if (error)
 		return (error);
 #ifdef MNT_EXNORESPORT
 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
 		saddr = (struct sockaddr_in *)nam;
 		if (saddr->sin_family == AF_INET &&
 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif
 	/*
 	 * Check/setup credentials.
 	 */
 	if (exflags & MNT_EXKERB) {
 		if (!kerbflag) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	} else if (kerbflag) {
 		vput(*vpp);
 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
 		cred->cr_uid = credanon->cr_uid;
 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
 			cred->cr_groups[i] = credanon->cr_groups[i];
 		cred->cr_ngroups = i;
 	}
 	if (exflags & MNT_EXRDONLY)
 		*rdonlyp = 1;
 	else
 		*rdonlyp = 0;
 
 	nfsrv_object_create(*vpp);
 
 	if (!lockflag)
 		VOP_UNLOCK(*vpp, 0, p);
 	return (0);
 }
 
 
 /*
  * WebNFS: check if a filehandle is a public filehandle. For v3, this
  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
  * transformed this to all zeroes in both cases, so check for it.
  */
 int
 nfs_ispublicfh(fhp)
 	fhandle_t *fhp;
 {
 	char *cp = (char *)fhp;
 	int i;
 
 	for (i = 0; i < NFSX_V3FH; i++)
 		if (*cp++ != 0)
 			return (FALSE);
 	return (TRUE);
 }
   
 #endif /* NFS_NOSERVER */
 /*
  * This function compares two net addresses by family and returns TRUE
  * if they are the same host.
  * If there is any doubt, return FALSE.
  * The AF_INET family is handled as a special case so that address mbufs
  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
  */
 int
 netaddr_match(family, haddr, nam)
 	int family;
 	union nethostaddr *haddr;
 	struct sockaddr *nam;
 {
 	register struct sockaddr_in *inetaddr;
 
 	switch (family) {
 	case AF_INET:
 		inetaddr = (struct sockaddr_in *)nam;
 		if (inetaddr->sin_family == AF_INET &&
 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
 			return (1);
 		break;
 #ifdef ISO
 	case AF_ISO:
 	    {
 		register struct sockaddr_iso *isoaddr1, *isoaddr2;
 
 		isoaddr1 = (struct sockaddr_iso *)nam;
 		isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
 		if (isoaddr1->siso_family == AF_ISO &&
 		    isoaddr1->siso_nlen > 0 &&
 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
 		    SAME_ISOADDR(isoaddr1, isoaddr2))
 			return (1);
 		break;
 	    }
 #endif	/* ISO */
 	default:
 		break;
 	};
 	return (0);
 }
 
 static nfsuint64 nfs_nullcookie = { { 0, 0 } };
 /*
  * This function finds the directory cookie that corresponds to the
  * logical byte offset given.
  */
 nfsuint64 *
 nfs_getcookie(np, off, add)
 	register struct nfsnode *np;
 	off_t off;
 	int add;
 {
 	register struct nfsdmap *dp, *dp2;
 	register int pos;
 
 	pos = (uoff_t)off / NFS_DIRBLKSIZ;
 	if (pos == 0 || off < 0) {
 #ifdef DIAGNOSTIC
 		if (add)
 			panic("nfs getcookie add at <= 0");
 #endif
 		return (&nfs_nullcookie);
 	}
 	pos--;
 	dp = np->n_cookies.lh_first;
 	if (!dp) {
 		if (add) {
 			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp->ndm_eocookie = 0;
 			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
 		} else
 			return ((nfsuint64 *)0);
 	}
 	while (pos >= NFSNUMCOOKIES) {
 		pos -= NFSNUMCOOKIES;
 		if (dp->ndm_list.le_next) {
 			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
 				pos >= dp->ndm_eocookie)
 				return ((nfsuint64 *)0);
 			dp = dp->ndm_list.le_next;
 		} else if (add) {
 			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp2->ndm_eocookie = 0;
 			LIST_INSERT_AFTER(dp, dp2, ndm_list);
 			dp = dp2;
 		} else
 			return ((nfsuint64 *)0);
 	}
 	if (pos >= dp->ndm_eocookie) {
 		if (add)
 			dp->ndm_eocookie = pos + 1;
 		else
 			return ((nfsuint64 *)0);
 	}
 	return (&dp->ndm_cookies[pos]);
 }
 
 /*
  * Invalidate cached directory information, except for the actual directory
  * blocks (which are invalidated separately).
  * Done mainly to avoid the use of stale offset cookies.
  */
 void
 nfs_invaldir(vp)
 	register struct vnode *vp;
 {
 	register struct nfsnode *np = VTONFS(vp);
 
 #ifdef DIAGNOSTIC
 	if (vp->v_type != VDIR)
 		panic("nfs: invaldir not dir");
 #endif
 	np->n_direofoffset = 0;
 	np->n_cookieverf.nfsuquad[0] = 0;
 	np->n_cookieverf.nfsuquad[1] = 0;
 	if (np->n_cookies.lh_first)
 		np->n_cookies.lh_first->ndm_eocookie = 0;
 }
 
 /*
  * The write verifier has changed (probably due to a server reboot), so all
  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
  * flag. Once done the new write verifier can be set for the mount point.
  */
 void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
 	register struct vnode *vp, *nvp;
 	register struct buf *bp, *nbp;
 	int s;
 
 	s = splbio();
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		if (vp->v_mount != mp)	/* Paranoia */
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bp->b_flags &= ~B_NEEDCOMMIT;
 		}
 	}
 	splx(s);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
  */
 int
 nfsrv_errmap(nd, err)
 	struct nfsrv_descript *nd;
 	register int err;
 {
 	register short *defaulterrp, *errp;
 
 	if (nd->nd_flag & ND_NFSV3) {
 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
 		while (*++errp) {
 			if (*errp == err)
 				return (err);
 			else if (*errp > err)
 				break;
 		}
 		return ((int)*defaulterrp);
 	    } else
 		return (err & 0xffff);
 	}
 	if (err <= ELAST)
 		return ((int)nfsrv_v2errmap[err - 1]);
 	return (NFSERR_IO);
 }
 
 int
 nfsrv_object_create(vp)
 	struct vnode *vp;
 {
 
 	if (vp == NULL || vp->v_type != VREG)
 		return (1);
 	return (vfs_object_create(vp, curproc,
 				  curproc ? curproc->p_ucred : NULL));
 }
 
 /*
  * Sort the group list in increasing numerical order.
  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
  *  that used to be here.)
  */
 void
 nfsrvw_sort(list, num)
         register gid_t *list;
         register int num;
 {
 	register int i, j;
 	gid_t v;
 
 	/* Insertion sort. */
 	for (i = 1; i < num; i++) {
 		v = list[i];
 		/* find correct slot for value v, moving others up */
 		for (j = i; --j >= 0 && v < list[j];)
 			list[j + 1] = list[j];
 		list[j + 1] = v;
 	}
 }
 
 /*
  * copy credentials making sure that the result can be compared with bcmp().
  */
 void
 nfsrv_setcred(incred, outcred)
 	register struct ucred *incred, *outcred;
 {
 	register int i;
 
 	bzero((caddr_t)outcred, sizeof (struct ucred));
 	outcred->cr_ref = 1;
 	outcred->cr_uid = incred->cr_uid;
 	outcred->cr_ngroups = incred->cr_ngroups;
 	for (i = 0; i < incred->cr_ngroups; i++)
 		outcred->cr_groups[i] = incred->cr_groups[i];
 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
 }
 #endif /* NFS_NOSERVER */
Index: head/sys/nfsclient/nfs_vnops.c
===================================================================
--- head/sys/nfsclient/nfs_vnops.c	(revision 49534)
+++ head/sys/nfsclient/nfs_vnops.c	(revision 49535)
@@ -1,3372 +1,3372 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.137 1999/07/30 04:51:35 wpaul Exp $
+ * $Id: nfs_vnops.c,v 1.138 1999/07/31 01:51:58 msmith Exp $
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <miscfs/fifofs/fifo.h>
-#include <miscfs/specfs/specdev.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_mmap __P((struct vop_mmap_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *, const char *, int,
 			struct ucred *, struct proc *, struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
 static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) nfs_abortop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
 	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
 				struct ucred *cred, struct proc *procp));
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
 				   int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   const char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_int32_t nfs_true, nfs_false;
 extern u_int32_t nfs_xdrneg1;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfsaccess_cache_hits;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
 	   &nfsaccess_cache_hits, 0, "NFS ACCESS cache hit count");
 
 static int	nfsaccess_cache_misses;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 
 	   &nfsaccess_cache_misses, 0, "NFS ACCESS cache miss count");
 
 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
 static int
 nfs3_access_otw(struct vnode *vp,
 		int wmode,
 		struct proc *p,
 		struct ucred *cred)
 {
 	const int v3 = 1;
 	u_int32_t *tl;
 	int error = 0, attrflag;
 	
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	caddr_t bpos, dpos, cp2;
 	register int32_t t1, t2;
 	register caddr_t cp;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 
 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
 	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 	nfsm_fhtom(vp, v3);
 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(wmode); 
 	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
 	nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		np->n_mode = rmode;
 		np->n_modeuid = cred->cr_uid;
 		np->n_modestamp = time_second;
 	}
 	nfsm_reqdone;
 	return error;
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	int error = 0;
 	u_int32_t mode, wmode;
 	int v3 = NFS_ISV3(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
 		    (ap->a_cred->cr_uid == np->n_modeuid) &&
 		    ((np->n_mode & mode) == mode)) {
 			nfsaccess_cache_hits++;
 		} else {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			nfsaccess_cache_misses++;
 		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
 			if (!error) {
 				if ((np->n_mode & mode) != mode) {
 					error = EACCES;
 				}
 			}
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0)
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
 #ifdef DIAGNOSTIC
 		printf("open eacces vtyp=%d\n",vp->v_type);
 #endif
 		return (EACCES);
 	}
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
 		    np->n_flag &= ~NMODIFIED;
 		} else
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 
 	if (v3 && nfsaccess_cache_timeout > 0) {
 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
 			return (0);
 	}
 
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error) {
 		nfsm_loadattr(vp, ap->a_vap);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags is not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			vnode_pager_setsize(vp, vap->va_size);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error) {
 				vnode_pager_setsize(vp, np->n_size);
  				return (error);
 			    }
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_int32_t *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		nfsm_v3attrbuild(vap, TRUE);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (mode_t)VNOVAL)
 			sp->sa_mode = nfs_xdrneg1;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = nfs_xdrneg1;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = nfs_xdrneg1;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	u_int32_t *tl;
 	caddr_t cp;
 	int32_t t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN))
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error)
 			return (error);
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dvp, 0, p);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN) &&
 		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
 		    	vput(newvp);
 			return (error);
 		}
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN))
 			VOP_UNLOCK(dvp, 0, p);
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent)
 				VOP_UNLOCK(dvp, 0, p);
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		if (len == NFS_MAXPATHLEN) {
 			struct nfsnode *np = VTONFS(vp);
 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 				len = np->n_size;
 		}
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			register u_int32_t x;
 
 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;	/* "begin offset" */
 			*tl++ = x;	/* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;	/* total to this offset */
 			*tl = x;	/* size of this write */
 		}
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					m_freem(mrep);
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		if (error)
 			break;
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_int32_t rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = nfs_xdrneg1;
 	else {
 		VOP_ABORTOP(dvp, cnp);
 		return (EOPNOTSUPP);
 	}
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		nfsm_v3attrbuild(vap, FALSE);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
 			*tl = txdr_unsigned(uminor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *newvp;
 	int error;
 
 	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
 	if (!error)
 		vput(newvp);
 	return (error);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
 #ifdef INET
 			if (!TAILQ_EMPTY(&in_ifaddrhead))
 				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 			else
 #endif
 				*tl++ = create_verf;
 			*tl = ++create_verf;
 		} else {
 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 			nfsm_v3attrbuild(vap, FALSE);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL))
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	const char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 */
 
 	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
 	if (tvp)
 	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	VOP_ABORTOP(tdvp, tcnp);
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	VOP_ABORTOP(fdvp, fcnp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	const char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	const char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3;
 
 	if (vp->v_mount != tdvp->v_mount) {
 		VOP_ABORTOP(tdvp, cnp);
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	v3 = NFS_ISV3(vp);
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (!error)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 	nfsm_reqdone;
 	if (newvp)
 		vput(newvp);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	/*
 	 * cnp's buffer expected to be freed if SAVESTART not set or
 	 * if an error was returned.
 	 */
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		VOP_ABORTOP(dvp, cnp);
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	if (error || (cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	if (dvp == vp)
 		return (EINVAL);
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp = NULL;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				fileno = fxdr_hyper(tl);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			fileno = fxdr_hyper(tl);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np);
 				    if (error)
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit && bigenough) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 				cnp->cn_hash = 0;
 				for (cp = cnp->cn_nameptr, i = 1; i <= len;
 				    i++, cp++)
 				    cnp->cn_hash += (unsigned char)*cp;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    if (newvp == vp)
 				vrele(newvp);
 			    else
 				vput(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	error = nfs_renameit(dvp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	const char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 static int
 nfs_commit(vp, offset, cnt, cred, procp)
 	register struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 
 	if (bp->b_flags & B_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED, p))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 static int
 nfs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT) ||
 			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to 
 			 * initiate I/O on the bp.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= B_WRITEINPROG;
 			vfs_busy_pages(bp, 1);
 
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS 
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				s = splbio();
 				vp->v_numoutput++;
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				splx(s);
 				biodone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "nfsfsync", slpflag, slptimeo);
 			splx(s);
 			if (error == 0)
 				panic("nfs_fsync: inconsistent lock");
 			if (error == ENOLCK)
 				goto loop;
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= (B_ASYNC | B_WRITEINPROG | B_NEEDCOMMIT);
 		splx(s);
 		VOP_BWRITE(bp->b_vp, bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%x",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Just call nfs_writebp() with the force argument set to 1.
  *
  * NOTE: B_DONE may or may not be set in a_bp on call.
  */
 static int
 nfs_bwrite(ap)
 	struct vop_bwrite_args /* {
 		struct vnode *a_bp;
 	} */ *ap;
 {
 	return (nfs_writebp(ap->a_bp, 1, curproc));
 }
 
 /*
  * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
  * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
  * B_CACHE if this is a VMIO buffer.
  */
 int
 nfs_writebp(bp, force, procp)
 	register struct buf *bp;
 	int force;
 	struct proc *procp;
 {
 	int s;
 	int oldflags = bp->b_flags;
 	int retv = 1;
 	off_t off;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not locked???");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return(0);
 	}
 
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 
 	s = splbio();
 	bundirty(bp);
 	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR);
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 
 	/*
 	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
 	 * an actual write will have to be scheduled via. VOP_STRATEGY().
 	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
 	 */
 	vfs_busy_pages(bp, 1);
 	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
 		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		bp->b_flags |= B_WRITEINPROG;
 		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 			bp->b_wcred, procp);
 		bp->b_flags &= ~B_WRITEINPROG;
 		if (!retv) {
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 			bp->b_flags &= ~B_NEEDCOMMIT;
 			biodone(bp);
 		} else if (retv == NFSERR_STALEWRITEVERF) {
 			nfs_clearcommit(bp->b_vp->v_mount);
 		}
 	}
 	if (retv) {
 		if (force)
 			bp->b_flags |= B_WRITEINPROG;
 		BUF_KERNPROC(bp);
 		VOP_STRATEGY(bp->b_vp, bp);
 	}
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
 			s = splbio();
 			reassignbuf(bp, bp->b_vp);
 			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		getnanotime(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/nfsserver/nfs_srvsubs.c
===================================================================
--- head/sys/nfsserver/nfs_srvsubs.c	(revision 49534)
+++ head/sys/nfsserver/nfs_srvsubs.c	(revision 49535)
@@ -1,2281 +1,2280 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_subs.c  8.8 (Berkeley) 5/22/95
- * $Id: nfs_subs.c,v 1.78 1999/06/27 11:44:19 peter Exp $
+ * $Id: nfs_subs.c,v 1.79 1999/07/17 18:43:47 phk Exp $
  */
 
 /*
  * These functions support the macros and help fiddle mbuf chains for
  * the nfs op functions. They do things like create the rpc header and
  * copy data between mbuf chains and uio lists.
  */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/namei.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/malloc.h>
 #include <sys/sysent.h>
 #include <sys/syscall.h>
+#include <sys/conf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsrtt.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <netinet/in.h>
 #ifdef ISO
 #include <netiso/iso.h>
 #endif
 
 /*
  * Data items converted to xdr at startup, since they are constant
  * This is kinda hokey, but may save a little time doing byte swaps
  */
 u_int32_t nfs_xdrneg1;
 u_int32_t rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
 	rpc_mismatch, rpc_auth_unix, rpc_msgaccepted,
 	rpc_auth_kerb;
 u_int32_t nfs_prog, nqnfs_prog, nfs_true, nfs_false;
 
 /* And other global data */
 static u_int32_t nfs_xid = 0;
 static enum vtype nv2tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON,  VNON 
 };
 enum vtype nv3tov_type[8]= {
 	VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO
 };
 
 int nfs_ticks;
 int nfs_pbuf_freecnt = -1;	/* start out unlimited */
 
 struct nfs_reqq nfs_reqq;
 struct nfssvc_sockhead nfssvc_sockhead;
 int nfssvc_sockhead_flag;
 struct nfsd_head nfsd_head;
 int nfsd_head_flag;
 struct nfs_bufq nfs_bufq;
 struct nqtimerhead nqtimerhead;
 struct nqfhhashhead *nqfhhashtbl;
 u_long nqfhhash;
 
 static void (*nfs_prev_lease_updatetime) __P((int));
 static int nfs_prev_nfssvc_sy_narg;
 static sy_call_t *nfs_prev_nfssvc_sy_call;
 
 #ifndef NFS_NOSERVER
 
 static vop_t *nfs_prev_vop_lease_check;
 static int nfs_prev_getfh_sy_narg;
 static sy_call_t *nfs_prev_getfh_sy_call;
 
 /*
  * Mapping of old NFS Version 2 RPC numbers to generic numbers.
  */
 int nfsv3_procid[NFS_NPROCS] = {
 	NFSPROC_NULL,
 	NFSPROC_GETATTR,
 	NFSPROC_SETATTR,
 	NFSPROC_NOOP,
 	NFSPROC_LOOKUP,
 	NFSPROC_READLINK,
 	NFSPROC_READ,
 	NFSPROC_NOOP,
 	NFSPROC_WRITE,
 	NFSPROC_CREATE,
 	NFSPROC_REMOVE,
 	NFSPROC_RENAME,
 	NFSPROC_LINK,
 	NFSPROC_SYMLINK,
 	NFSPROC_MKDIR,
 	NFSPROC_RMDIR,
 	NFSPROC_READDIR,
 	NFSPROC_FSSTAT,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP,
 	NFSPROC_NOOP
 };
 
 #endif /* NFS_NOSERVER */
 /*
  * and the reverse mapping from generic to Version 2 procedure numbers
  */
 int nfsv2_procid[NFS_NPROCS] = {
 	NFSV2PROC_NULL,
 	NFSV2PROC_GETATTR,
 	NFSV2PROC_SETATTR,
 	NFSV2PROC_LOOKUP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_READLINK,
 	NFSV2PROC_READ,
 	NFSV2PROC_WRITE,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_MKDIR,
 	NFSV2PROC_SYMLINK,
 	NFSV2PROC_CREATE,
 	NFSV2PROC_REMOVE,
 	NFSV2PROC_RMDIR,
 	NFSV2PROC_RENAME,
 	NFSV2PROC_LINK,
 	NFSV2PROC_READDIR,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_STATFS,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 	NFSV2PROC_NOOP,
 };
 
 #ifndef NFS_NOSERVER
 /*
  * Maps errno values to nfs error numbers.
  * Use NFSERR_IO as the catch all for ones not specifically defined in
  * RFC 1094.
  */
 static u_char nfsrv_v2errmap[ELAST] = {
   NFSERR_PERM,	NFSERR_NOENT,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NXIO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_ACCES,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_EXIST,	NFSERR_IO,	NFSERR_NODEV,	NFSERR_NOTDIR,
   NFSERR_ISDIR,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_FBIG,	NFSERR_NOSPC,	NFSERR_IO,	NFSERR_ROFS,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_NAMETOL,	NFSERR_IO,	NFSERR_IO,
   NFSERR_NOTEMPTY, NFSERR_IO,	NFSERR_IO,	NFSERR_DQUOT,	NFSERR_STALE,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,	NFSERR_IO,
   NFSERR_IO /* << Last is 86 */
 };
 
 /*
  * Maps errno values to nfs error numbers.
  * Although it is not obvious whether or not NFS clients really care if
  * a returned error value is in the specified list for the procedure, the
  * safest thing to do is filter them appropriately. For Version 2, the
  * X/Open XNFS document is the only specification that defines error values
  * for each RPC (The RFC simply lists all possible error values for all RPCs),
  * so I have decided to not do this for Version 2.
  * The first entry is the default error return and the rest are the valid
  * errors for that RPC in increasing numeric order.
  */
 static short nfsv3err_null[] = {
 	0,
 	0,
 };
 
 static short nfsv3err_getattr[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_setattr[] = {
 	NFSERR_IO,
 	NFSERR_PERM,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOT_SYNC,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_lookup[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_access[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_read[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_NXIO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_write[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_INVAL,
 	NFSERR_FBIG,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_create[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mkdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_symlink[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_mknod[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	NFSERR_BADTYPE,
 	0,
 };
 
 static short nfsv3err_remove[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rmdir[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_ROFS,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_rename[] = {
 	NFSERR_IO,
 	NFSERR_NOENT,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_ISDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_NOTEMPTY,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_link[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_EXIST,
 	NFSERR_XDEV,
 	NFSERR_NOTDIR,
 	NFSERR_INVAL,
 	NFSERR_NOSPC,
 	NFSERR_ROFS,
 	NFSERR_MLINK,
 	NFSERR_NAMETOL,
 	NFSERR_DQUOT,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_NOTSUPP,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdir[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_readdirplus[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_ACCES,
 	NFSERR_NOTDIR,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_BAD_COOKIE,
 	NFSERR_NOTSUPP,
 	NFSERR_TOOSMALL,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsstat[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_fsinfo[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_pathconf[] = {
 	NFSERR_STALE,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short nfsv3err_commit[] = {
 	NFSERR_IO,
 	NFSERR_IO,
 	NFSERR_STALE,
 	NFSERR_BADHANDLE,
 	NFSERR_SERVERFAULT,
 	0,
 };
 
 static short *nfsrv_v3errmap[] = {
 	nfsv3err_null,
 	nfsv3err_getattr,
 	nfsv3err_setattr,
 	nfsv3err_lookup,
 	nfsv3err_access,
 	nfsv3err_readlink,
 	nfsv3err_read,
 	nfsv3err_write,
 	nfsv3err_create,
 	nfsv3err_mkdir,
 	nfsv3err_symlink,
 	nfsv3err_mknod,
 	nfsv3err_remove,
 	nfsv3err_rmdir,
 	nfsv3err_rename,
 	nfsv3err_link,
 	nfsv3err_readdir,
 	nfsv3err_readdirplus,
 	nfsv3err_fsstat,
 	nfsv3err_fsinfo,
 	nfsv3err_pathconf,
 	nfsv3err_commit,
 };
 
 #endif /* NFS_NOSERVER */
 
 extern struct nfsrtt nfsrtt;
 extern time_t nqnfsstarttime;
 extern int nqsrv_clockskew;
 extern int nqsrv_writeslack;
 extern int nqsrv_maxlease;
 extern struct nfsstats nfsstats;
 extern int nqnfs_piggy[NFS_NPROCS];
 extern nfstype nfsv2_type[9];
 extern nfstype nfsv3_type[9];
 extern struct nfsnodehashhead *nfsnodehashtbl;
 extern u_long nfsnodehash;
 
 struct getfh_args;
 extern int getfh(struct proc *, struct getfh_args *, int *);
 struct nfssvc_args;
 extern int nfssvc(struct proc *, struct nfssvc_args *, int *);
 
 LIST_HEAD(nfsnodehashhead, nfsnode);
 
 int nfs_webnamei __P((struct nameidata *, struct vnode *, struct proc *));
 
 u_quad_t
 nfs_curusec() 
 {
 	struct timeval tv;
 	
 	getmicrotime(&tv);
 	return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec);
 }
 
 /*
  * Create the header for an rpc request packet
  * The hsiz is the size of the rest of the nfs request header.
  * (just used to decide if a cluster is a good idea)
  */
 struct mbuf *
 nfsm_reqh(vp, procid, hsiz, bposp)
 	struct vnode *vp;
 	u_long procid;
 	int hsiz;
 	caddr_t *bposp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	struct mbuf *mb2;
 	struct nfsmount *nmp;
 	int nqflag;
 
 	MGET(mb, M_WAIT, MT_DATA);
 	if (hsiz >= MINCLSIZE)
 		MCLGET(mb, M_WAIT);
 	mb->m_len = 0;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * For NQNFS, add lease request.
 	 */
 	if (vp) {
 		nmp = VFSTONFS(vp->v_mount);
 		if (nmp->nm_flag & NFSMNT_NQNFS) {
 			nqflag = NQNFS_NEEDLEASE(vp, procid);
 			if (nqflag) {
 				nfsm_build(tl, u_int32_t *, 2*NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(nqflag);
 				*tl = txdr_unsigned(nmp->nm_leaseterm);
 			} else {
 				nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 				*tl = 0;
 			}
 		}
 	}
 	/* Finally, return values */
 	*bposp = bpos;
 	return (mb);
 }
 
 /*
  * Build the RPC header and fill in the authorization info.
  * The authorization string argument is only used when the credentials
  * come from outside of the kernel.
  * Returns the head of the mbuf list.
  */
 struct mbuf *
 nfsm_rpchead(cr, nmflag, procid, auth_type, auth_len, auth_str, verf_len,
 	verf_str, mrest, mrest_len, mbp, xidp)
 	register struct ucred *cr;
 	int nmflag;
 	int procid;
 	int auth_type;
 	int auth_len;
 	char *auth_str;
 	int verf_len;
 	char *verf_str;
 	struct mbuf *mrest;
 	int mrest_len;
 	struct mbuf **mbp;
 	u_int32_t *xidp;
 {
 	register struct mbuf *mb;
 	register u_int32_t *tl;
 	register caddr_t bpos;
 	register int i;
 	struct mbuf *mreq, *mb2;
 	int siz, grpsiz, authsiz;
 
 	authsiz = nfsm_rndup(auth_len);
 	MGETHDR(mb, M_WAIT, MT_DATA);
 	if ((authsiz + 10 * NFSX_UNSIGNED) >= MINCLSIZE) {
 		MCLGET(mb, M_WAIT);
 	} else if ((authsiz + 10 * NFSX_UNSIGNED) < MHLEN) {
 		MH_ALIGN(mb, authsiz + 10 * NFSX_UNSIGNED);
 	} else {
 		MH_ALIGN(mb, 8 * NFSX_UNSIGNED);
 	}
 	mb->m_len = 0;
 	mreq = mb;
 	bpos = mtod(mb, caddr_t);
 
 	/*
 	 * First the RPC header.
 	 */
 	nfsm_build(tl, u_int32_t *, 8 * NFSX_UNSIGNED);
 
 	/* Get a pretty random xid to start with */
 	if (!nfs_xid) 
 		nfs_xid = random();
 	/*
 	 * Skip zero xid if it should ever happen.
 	 */
 	if (++nfs_xid == 0)
 		nfs_xid++;
 
 	*tl++ = *xidp = txdr_unsigned(nfs_xid);
 	*tl++ = rpc_call;
 	*tl++ = rpc_vers;
 	if (nmflag & NFSMNT_NQNFS) {
 		*tl++ = txdr_unsigned(NQNFS_PROG);
 		*tl++ = txdr_unsigned(NQNFS_VER3);
 	} else {
 		*tl++ = txdr_unsigned(NFS_PROG);
 		if (nmflag & NFSMNT_NFSV3)
 			*tl++ = txdr_unsigned(NFS_VER3);
 		else
 			*tl++ = txdr_unsigned(NFS_VER2);
 	}
 	if (nmflag & NFSMNT_NFSV3)
 		*tl++ = txdr_unsigned(procid);
 	else
 		*tl++ = txdr_unsigned(nfsv2_procid[procid]);
 
 	/*
 	 * And then the authorization cred.
 	 */
 	*tl++ = txdr_unsigned(auth_type);
 	*tl = txdr_unsigned(authsiz);
 	switch (auth_type) {
 	case RPCAUTH_UNIX:
 		nfsm_build(tl, u_int32_t *, auth_len);
 		*tl++ = 0;		/* stamp ?? */
 		*tl++ = 0;		/* NULL hostname */
 		*tl++ = txdr_unsigned(cr->cr_uid);
 		*tl++ = txdr_unsigned(cr->cr_groups[0]);
 		grpsiz = (auth_len >> 2) - 5;
 		*tl++ = txdr_unsigned(grpsiz);
 		for (i = 1; i <= grpsiz; i++)
 			*tl++ = txdr_unsigned(cr->cr_groups[i]);
 		break;
 	case RPCAUTH_KERB4:
 		siz = auth_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(auth_str, bpos, i);
 			mb->m_len += i;
 			auth_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 		break;
 	};
 
 	/*
 	 * And the verifier...
 	 */
 	nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 	if (verf_str) {
 		*tl++ = txdr_unsigned(RPCAUTH_KERB4);
 		*tl = txdr_unsigned(verf_len);
 		siz = verf_len;
 		while (siz > 0) {
 			if (M_TRAILINGSPACE(mb) == 0) {
 				MGET(mb2, M_WAIT, MT_DATA);
 				if (siz >= MINCLSIZE)
 					MCLGET(mb2, M_WAIT);
 				mb->m_next = mb2;
 				mb = mb2;
 				mb->m_len = 0;
 				bpos = mtod(mb, caddr_t);
 			}
 			i = min(siz, M_TRAILINGSPACE(mb));
 			bcopy(verf_str, bpos, i);
 			mb->m_len += i;
 			verf_str += i;
 			bpos += i;
 			siz -= i;
 		}
 		if ((siz = (nfsm_rndup(verf_len) - verf_len)) > 0) {
 			for (i = 0; i < siz; i++)
 				*bpos++ = '\0';
 			mb->m_len += siz;
 		}
 	} else {
 		*tl++ = txdr_unsigned(RPCAUTH_NULL);
 		*tl = 0;
 	}
 	mb->m_next = mrest;
 	mreq->m_pkthdr.len = authsiz + 10 * NFSX_UNSIGNED + mrest_len;
 	mreq->m_pkthdr.rcvif = (struct ifnet *)0;
 	*mbp = mb;
 	return (mreq);
 }
 
 /*
  * copies mbuf chain to the uio scatter/gather list
  */
 int
 nfsm_mbuftouio(mrep, uiop, siz, dpos)
 	struct mbuf **mrep;
 	register struct uio *uiop;
 	int siz;
 	caddr_t *dpos;
 {
 	register char *mbufcp, *uiocp;
 	register int xfer, left, len;
 	register struct mbuf *mp;
 	long uiosiz, rem;
 	int error = 0;
 
 	mp = *mrep;
 	mbufcp = *dpos;
 	len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
 	rem = nfsm_rndup(siz)-siz;
 	while (siz > 0) {
 		if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
 			return (EFBIG);
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			while (len == 0) {
 				mp = mp->m_next;
 				if (mp == NULL)
 					return (EBADRPC);
 				mbufcp = mtod(mp, caddr_t);
 				len = mp->m_len;
 			}
 			xfer = (left > len) ? len : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(mbufcp, uiocp, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(mbufcp, uiocp, xfer);
 			else
 				copyout(mbufcp, uiocp, xfer);
 			left -= xfer;
 			len -= xfer;
 			mbufcp += xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		if (uiop->uio_iov->iov_len <= siz) {
 			uiop->uio_iovcnt--;
 			uiop->uio_iov++;
 		} else {
 			uiop->uio_iov->iov_base += uiosiz;
 			uiop->uio_iov->iov_len -= uiosiz;
 		}
 		siz -= uiosiz;
 	}
 	*dpos = mbufcp;
 	*mrep = mp;
 	if (rem > 0) {
 		if (len < rem)
 			error = nfs_adv(mrep, dpos, rem, len);
 		else
 			*dpos += rem;
 	}
 	return (error);
 }
 
 /*
  * copies a uio scatter/gather list to an mbuf chain.
  * NOTE: can ony handle iovcnt == 1
  */
 int
 nfsm_uiotombuf(uiop, mq, siz, bpos)
 	register struct uio *uiop;
 	struct mbuf **mq;
 	int siz;
 	caddr_t *bpos;
 {
 	register char *uiocp;
 	register struct mbuf *mp, *mp2;
 	register int xfer, left, mlen;
 	int uiosiz, clflg, rem;
 	char *cp;
 
 #ifdef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfsm_uiotombuf: iovcnt != 1");
 #endif
 
 	if (siz > MLEN)		/* or should it >= MCLBYTES ?? */
 		clflg = 1;
 	else
 		clflg = 0;
 	rem = nfsm_rndup(siz)-siz;
 	mp = mp2 = *mq;
 	while (siz > 0) {
 		left = uiop->uio_iov->iov_len;
 		uiocp = uiop->uio_iov->iov_base;
 		if (left > siz)
 			left = siz;
 		uiosiz = left;
 		while (left > 0) {
 			mlen = M_TRAILINGSPACE(mp);
 			if (mlen == 0) {
 				MGET(mp, M_WAIT, MT_DATA);
 				if (clflg)
 					MCLGET(mp, M_WAIT);
 				mp->m_len = 0;
 				mp2->m_next = mp;
 				mp2 = mp;
 				mlen = M_TRAILINGSPACE(mp);
 			}
 			xfer = (left > mlen) ? mlen : left;
 #ifdef notdef
 			/* Not Yet.. */
 			if (uiop->uio_iov->iov_op != NULL)
 				(*(uiop->uio_iov->iov_op))
 				(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 #endif
 			if (uiop->uio_segflg == UIO_SYSSPACE)
 				bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			else
 				copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
 			mp->m_len += xfer;
 			left -= xfer;
 			uiocp += xfer;
 			uiop->uio_offset += xfer;
 			uiop->uio_resid -= xfer;
 		}
 		uiop->uio_iov->iov_base += uiosiz;
 		uiop->uio_iov->iov_len -= uiosiz;
 		siz -= uiosiz;
 	}
 	if (rem > 0) {
 		if (rem > M_TRAILINGSPACE(mp)) {
 			MGET(mp, M_WAIT, MT_DATA);
 			mp->m_len = 0;
 			mp2->m_next = mp;
 		}
 		cp = mtod(mp, caddr_t)+mp->m_len;
 		for (left = 0; left < rem; left++)
 			*cp++ = '\0';
 		mp->m_len += rem;
 		*bpos = cp;
 	} else
 		*bpos = mtod(mp, caddr_t)+mp->m_len;
 	*mq = mp;
 	return (0);
 }
 
 /*
  * Help break down an mbuf chain by setting the first siz bytes contiguous
  * pointed to by returned val.
  * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
  * cases. (The macros use the vars. dpos and dpos2)
  */
 int
 nfsm_disct(mdp, dposp, siz, left, cp2)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int siz;
 	int left;
 	caddr_t *cp2;
 {
 	register struct mbuf *mp, *mp2;
 	register int siz2, xfer;
 	register caddr_t p;
 
 	mp = *mdp;
 	while (left == 0) {
 		*mdp = mp = mp->m_next;
 		if (mp == NULL)
 			return (EBADRPC);
 		left = mp->m_len;
 		*dposp = mtod(mp, caddr_t);
 	}
 	if (left >= siz) {
 		*cp2 = *dposp;
 		*dposp += siz;
 	} else if (mp->m_next == NULL) {
 		return (EBADRPC);
 	} else if (siz > MHLEN) {
 		panic("nfs S too big");
 	} else {
 		MGET(mp2, M_WAIT, MT_DATA);
 		mp2->m_next = mp->m_next;
 		mp->m_next = mp2;
 		mp->m_len -= left;
 		mp = mp2;
 		*cp2 = p = mtod(mp, caddr_t);
 		bcopy(*dposp, p, left);		/* Copy what was left */
 		siz2 = siz-left;
 		p += left;
 		mp2 = mp->m_next;
 		/* Loop around copying up the siz2 bytes */
 		while (siz2 > 0) {
 			if (mp2 == NULL)
 				return (EBADRPC);
 			xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
 			if (xfer > 0) {
 				bcopy(mtod(mp2, caddr_t), p, xfer);
 				NFSMADV(mp2, xfer);
 				mp2->m_len -= xfer;
 				p += xfer;
 				siz2 -= xfer;
 			}
 			if (siz2 > 0)
 				mp2 = mp2->m_next;
 		}
 		mp->m_len = siz;
 		*mdp = mp2;
 		*dposp = mtod(mp2, caddr_t);
 	}
 	return (0);
 }
 
 /*
  * Advance the position in the mbuf chain.
  */
 int
 nfs_adv(mdp, dposp, offs, left)
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	int offs;
 	int left;
 {
 	register struct mbuf *m;
 	register int s;
 
 	m = *mdp;
 	s = left;
 	while (s < offs) {
 		offs -= s;
 		m = m->m_next;
 		if (m == NULL)
 			return (EBADRPC);
 		s = m->m_len;
 	}
 	*mdp = m;
 	*dposp = mtod(m, caddr_t)+offs;
 	return (0);
 }
 
 /*
  * Copy a string into mbufs for the hard cases...
  */
 int
 nfsm_strtmbuf(mb, bpos, cp, siz)
 	struct mbuf **mb;
 	char **bpos;
 	const char *cp;
 	long siz;
 {
 	register struct mbuf *m1 = NULL, *m2;
 	long left, xfer, len, tlen;
 	u_int32_t *tl;
 	int putsize;
 
 	putsize = 1;
 	m2 = *mb;
 	left = M_TRAILINGSPACE(m2);
 	if (left > 0) {
 		tl = ((u_int32_t *)(*bpos));
 		*tl++ = txdr_unsigned(siz);
 		putsize = 0;
 		left -= NFSX_UNSIGNED;
 		m2->m_len += NFSX_UNSIGNED;
 		if (left > 0) {
 			bcopy(cp, (caddr_t) tl, left);
 			siz -= left;
 			cp += left;
 			m2->m_len += left;
 			left = 0;
 		}
 	}
 	/* Loop around adding mbufs */
 	while (siz > 0) {
 		MGET(m1, M_WAIT, MT_DATA);
 		if (siz > MLEN)
 			MCLGET(m1, M_WAIT);
 		m1->m_len = NFSMSIZ(m1);
 		m2->m_next = m1;
 		m2 = m1;
 		tl = mtod(m1, u_int32_t *);
 		tlen = 0;
 		if (putsize) {
 			*tl++ = txdr_unsigned(siz);
 			m1->m_len -= NFSX_UNSIGNED;
 			tlen = NFSX_UNSIGNED;
 			putsize = 0;
 		}
 		if (siz < m1->m_len) {
 			len = nfsm_rndup(siz);
 			xfer = siz;
 			if (xfer < len)
 				*(tl+(xfer>>2)) = 0;
 		} else {
 			xfer = len = m1->m_len;
 		}
 		bcopy(cp, (caddr_t) tl, xfer);
 		m1->m_len = len+tlen;
 		siz -= xfer;
 		cp += xfer;
 	}
 	*mb = m1;
 	*bpos = mtod(m1, caddr_t)+m1->m_len;
 	return (0);
 }
 
 /*
  * Called once to initialize data structures...
  */
 int
 nfs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 	register int i;
 
 	nfsmount_zone = zinit("NFSMOUNT", sizeof(struct nfsmount), 0, 0, 1);
 
 	/*
 	 * Check to see if major data structures haven't bloated.
 	 */
 	if (sizeof (struct nfssvc_sock) > NFS_SVCALLOC) {
 		printf("struct nfssvc_sock bloated (> %dbytes)\n",NFS_SVCALLOC);
 		printf("Try reducing NFS_UIDHASHSIZ\n");
 	}
 	if (sizeof (struct nfsuid) > NFS_UIDALLOC) {
 		printf("struct nfsuid bloated (> %dbytes)\n",NFS_UIDALLOC);
 		printf("Try unionizing the nu_nickname and nu_flag fields\n");
 	}
 	nfs_mount_type = vfsp->vfc_typenum;
 	nfsrtt.pos = 0;
 	rpc_vers = txdr_unsigned(RPC_VER2);
 	rpc_call = txdr_unsigned(RPC_CALL);
 	rpc_reply = txdr_unsigned(RPC_REPLY);
 	rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
 	rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
 	rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
 	rpc_autherr = txdr_unsigned(RPC_AUTHERR);
 	rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
 	rpc_auth_kerb = txdr_unsigned(RPCAUTH_KERB4);
 	nfs_prog = txdr_unsigned(NFS_PROG);
 	nqnfs_prog = txdr_unsigned(NQNFS_PROG);
 	nfs_true = txdr_unsigned(TRUE);
 	nfs_false = txdr_unsigned(FALSE);
 	nfs_xdrneg1 = txdr_unsigned(-1);
 	nfs_ticks = (hz * NFS_TICKINTVL + 500) / 1000;
 	if (nfs_ticks < 1)
 		nfs_ticks = 1;
 	/* Ensure async daemons disabled */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++) {
 		nfs_iodwant[i] = (struct proc *)0;
 		nfs_iodmount[i] = (struct nfsmount *)0;
 	}
 	nfs_nhinit();			/* Init the nfsnode table */
 #ifndef NFS_NOSERVER
 	nfsrv_init(0);			/* Init server data structures */
 	nfsrv_initcache();		/* Init the server request cache */
 #endif
 
 	/*
 	 * Initialize the nqnfs server stuff.
 	 */
 	if (nqnfsstarttime == 0) {
 		nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
 			+ nqsrv_clockskew + nqsrv_writeslack;
 		NQLOADNOVRAM(nqnfsstarttime);
 		CIRCLEQ_INIT(&nqtimerhead);
 		nqfhhashtbl = hashinit(NQLCHSZ, M_NQLEASE, &nqfhhash);
 	}
 
 	/*
 	 * Initialize reply list and start timer
 	 */
 	TAILQ_INIT(&nfs_reqq);
 
 	nfs_timer(0);
 
 	/*
 	 * Set up lease_check and lease_updatetime so that other parts
 	 * of the system can call us, if we are loadable.
 	 */
 #ifndef NFS_NOSERVER
 	nfs_prev_vop_lease_check = default_vnodeop_p[VOFFSET(vop_lease)];
 	default_vnodeop_p[VOFFSET(vop_lease)] = (vop_t *)nqnfs_vop_lease_check;
 #endif
 	nfs_prev_lease_updatetime = lease_updatetime;
 	lease_updatetime = nfs_lease_updatetime;
 	nfs_prev_nfssvc_sy_narg = sysent[SYS_nfssvc].sy_narg;
 	sysent[SYS_nfssvc].sy_narg = 2;
 	nfs_prev_nfssvc_sy_call = sysent[SYS_nfssvc].sy_call;
 	sysent[SYS_nfssvc].sy_call = (sy_call_t *)nfssvc;
 #ifndef NFS_NOSERVER
 	nfs_prev_getfh_sy_narg = sysent[SYS_getfh].sy_narg;
 	sysent[SYS_getfh].sy_narg = 2;
 	nfs_prev_getfh_sy_call = sysent[SYS_getfh].sy_call;
 	sysent[SYS_getfh].sy_call = (sy_call_t *)getfh;
 #endif
 
 	nfs_pbuf_freecnt = nswbuf / 2 + 1;
 
 	return (0);
 }
 
 int
 nfs_uninit(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	untimeout(nfs_timer, (void *)NULL, nfs_timer_handle);
 	nfs_mount_type = -1;
 #ifndef NFS_NOSERVER
 	default_vnodeop_p[VOFFSET(vop_lease)] = nfs_prev_vop_lease_check;
 #endif
 	lease_updatetime = nfs_prev_lease_updatetime;
 	sysent[SYS_nfssvc].sy_narg = nfs_prev_nfssvc_sy_narg;
 	sysent[SYS_nfssvc].sy_call = nfs_prev_nfssvc_sy_call;
 #ifndef NFS_NOSERVER
 	sysent[SYS_getfh].sy_narg = nfs_prev_getfh_sy_narg;
 	sysent[SYS_getfh].sy_call = nfs_prev_getfh_sy_call;
 #endif
 	return (0);
 }
 
 /*
  * Attribute cache routines.
  * nfs_loadattrcache() - loads or updates the cache contents from attributes
  *	that are on the mbuf list
  * nfs_getattrcache() - returns valid attributes if found in cache, returns
  *	error otherwise
  */
 
 /*
  * Load the attribute cache (that lives in the nfsnode entry) with
  * the values on the mbuf list and
  * Iff vap not NULL
  *    copy the attributes to *vaper
  */
 int
 nfs_loadattrcache(vpp, mdp, dposp, vaper)
 	struct vnode **vpp;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vattr *vaper;
 {
 	register struct vnode *vp = *vpp;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 	register struct nfsnode *np;
 	register int32_t t1;
 	caddr_t cp2;
 	int error = 0, rdev;
 	struct mbuf *md;
 	enum vtype vtyp;
 	u_short vmode;
 	struct timespec mtime;
 	struct vnode *nvp;
 	int v3 = NFS_ISV3(vp);
 
 	md = *mdp;
 	t1 = (mtod(md, caddr_t) + md->m_len) - *dposp;
 	if ((error = nfsm_disct(mdp, dposp, NFSX_FATTR(v3), t1, &cp2)) != 0)
 		return (error);
 	fp = (struct nfs_fattr *)cp2;
 	if (v3) {
 		vtyp = nfsv3tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		rdev = makeudev(fxdr_unsigned(int, fp->fa3_rdev.specdata1),
 			fxdr_unsigned(int, fp->fa3_rdev.specdata2));
 		fxdr_nfsv3time(&fp->fa3_mtime, &mtime);
 	} else {
 		vtyp = nfsv2tov_type(fp->fa_type);
 		vmode = fxdr_unsigned(u_short, fp->fa_mode);
 		/*
 		 * XXX
 		 *
 		 * The duplicate information returned in fa_type and fa_mode
 		 * is an ambiguity in the NFS version 2 protocol.
 		 *
 		 * VREG should be taken literally as a regular file.  If a
 		 * server intents to return some type information differently
 		 * in the upper bits of the mode field (e.g. for sockets, or
 		 * FIFOs), NFSv2 mandates fa_type to be VNON.  Anyway, we
 		 * leave the examination of the mode bits even in the VREG
 		 * case to avoid breakage for bogus servers, but we make sure
 		 * that there are actually type bits set in the upper part of
 		 * fa_mode (and failing that, trust the va_type field).
 		 *
 		 * NFSv3 cleared the issue, and requires fa_mode to not
 		 * contain any type information (while also introduing sockets
 		 * and FIFOs for fa_type).
 		 */
 		if (vtyp == VNON || (vtyp == VREG && (vmode & S_IFMT) != 0))
 			vtyp = IFTOVT(vmode);
 		rdev = fxdr_unsigned(int32_t, fp->fa2_rdev);
 		fxdr_nfsv2time(&fp->fa2_mtime, &mtime);
 
 		/*
 		 * Really ugly NFSv2 kludge.
 		 */
 		if (vtyp == VCHR && rdev == 0xffffffff)
 			vtyp = VFIFO;
 	}
 
 	/*
 	 * If v_type == VNON it is a new node, so fill in the v_type,
 	 * n_mtime fields. Check to see if it represents a special
 	 * device, and if so, check for a possible alias. Once the
 	 * correct vnode has been obtained, fill in the rest of the
 	 * information.
 	 */
 	np = VTONFS(vp);
 	if (vp->v_type != vtyp) {
 		vp->v_type = vtyp;
 		if (vp->v_type == VFIFO) {
 			vp->v_op = fifo_nfsv2nodeop_p;
 		}
 		if (vp->v_type == VCHR || vp->v_type == VBLK) {
 			vp->v_op = spec_nfsv2nodeop_p;
 			nvp = checkalias(vp, rdev, vp->v_mount);
 			if (nvp) {
 				/*
 				 * Discard unneeded vnode, but save its nfsnode.
 				 * Since the nfsnode does not have a lock, its
 				 * vnode lock has to be carried over.
 				 */
 				nvp->v_vnlock = vp->v_vnlock;
 				vp->v_vnlock = NULL;
 				nvp->v_data = vp->v_data;
 				vp->v_data = NULL;
 				vp->v_op = spec_vnodeop_p;
 				vrele(vp);
 				vgone(vp);
 				/*
 				 * Reinitialize aliased node.
 				 */
 				np->n_vnode = nvp;
 				*vpp = vp = nvp;
 			}
 		}
 		np->n_mtime = mtime.tv_sec;
 	}
 	vap = &np->n_vattr;
 	vap->va_type = vtyp;
 	vap->va_mode = (vmode & 07777);
 	vap->va_rdev = rdev;
 	vap->va_mtime = mtime;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	if (v3) {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_hyper(&fp->fa3_size);
 		vap->va_blocksize = NFS_FABLKSIZE;
 		vap->va_bytes = fxdr_hyper(&fp->fa3_used);
 		vap->va_fileid = fxdr_unsigned(int32_t,
 		    fp->fa3_fileid.nfsuquad[1]);
 		fxdr_nfsv3time(&fp->fa3_atime, &vap->va_atime);
 		fxdr_nfsv3time(&fp->fa3_ctime, &vap->va_ctime);
 		vap->va_flags = 0;
 		vap->va_filerev = 0;
 	} else {
 		vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
 		vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
 		vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
 		vap->va_size = fxdr_unsigned(u_int32_t, fp->fa2_size);
 		vap->va_blocksize = fxdr_unsigned(int32_t, fp->fa2_blocksize);
 		vap->va_bytes = (u_quad_t)fxdr_unsigned(int32_t, fp->fa2_blocks)
 		    * NFS_FABLKSIZE;
 		vap->va_fileid = fxdr_unsigned(int32_t, fp->fa2_fileid);
 		fxdr_nfsv2time(&fp->fa2_atime, &vap->va_atime);
 		vap->va_flags = 0;
 		vap->va_ctime.tv_sec = fxdr_unsigned(u_int32_t,
 		    fp->fa2_ctime.nfsv2_sec);
 		vap->va_ctime.tv_nsec = 0;
 		vap->va_gen = fxdr_unsigned(u_int32_t,fp->fa2_ctime.nfsv2_usec);
 		vap->va_filerev = 0;
 	}
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	np->n_attrstamp = time_second;
 	if (vaper != NULL) {
 		bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
 		if (np->n_flag & NCHG) {
 			if (np->n_flag & NACC)
 				vaper->va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vaper->va_mtime = np->n_mtim;
 		}
 	}
 	return (0);
 }
 
 #ifdef NFS_ACDEBUG
 #include <sys/sysctl.h>
 SYSCTL_DECL(_vfs_nfs);
 static int nfs_acdebug;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, acdebug, CTLFLAG_RW, &nfs_acdebug, 0, "");
 #endif
 
 /*
  * Check the time stamp
  * If the cache is valid, copy contents to *vap and return 0
  * otherwise return an error
  */
 int
 nfs_getattrcache(vp, vaper)
 	register struct vnode *vp;
 	struct vattr *vaper;
 {
 	register struct nfsnode *np;
 	register struct vattr *vap;
 	struct nfsmount *nmp;
 	int timeo;
 
 	np = VTONFS(vp);
 	vap = &np->n_vattr;
 	nmp = VFSTONFS(vp->v_mount);
 	/* XXX n_mtime doesn't seem to be updated on a miss-and-reload */
 	timeo = (time_second - np->n_mtime) / 10;
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug>1)
 		printf("nfs_getattrcache: initial timeo = %d\n", timeo);
 #endif
 
 	if (vap->va_type == VDIR) {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acdirmin)
 			timeo = nmp->nm_acdirmin;
 		else if (timeo > nmp->nm_acdirmax)
 			timeo = nmp->nm_acdirmax;
 	} else {
 		if ((np->n_flag & NMODIFIED) || timeo < nmp->nm_acregmin)
 			timeo = nmp->nm_acregmin;
 		else if (timeo > nmp->nm_acregmax)
 			timeo = nmp->nm_acregmax;
 	}
 
 #ifdef NFS_ACDEBUG
 	if (nfs_acdebug > 2)
 		printf("acregmin %d; acregmax %d; acdirmin %d; acdirmax %d\n",
 			nmp->nm_acregmin, nmp->nm_acregmax,
 			nmp->nm_acdirmin, nmp->nm_acdirmax);
 
 	if (nfs_acdebug)
 		printf("nfs_getattrcache: age = %d; final timeo = %d\n",
 			(time_second - np->n_attrstamp), timeo);
 #endif
 
 	if ((time_second - np->n_attrstamp) >= timeo) {
 		nfsstats.attrcache_misses++;
 		return (ENOENT);
 	}
 	nfsstats.attrcache_hits++;
 	if (vap->va_size != np->n_size) {
 		if (vap->va_type == VREG) {
 			if (np->n_flag & NMODIFIED) {
 				if (vap->va_size < np->n_size)
 					vap->va_size = np->n_size;
 				else
 					np->n_size = vap->va_size;
 			} else
 				np->n_size = vap->va_size;
 			vnode_pager_setsize(vp, np->n_size);
 		} else
 			np->n_size = vap->va_size;
 	}
 	bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
 	if (np->n_flag & NCHG) {
 		if (np->n_flag & NACC)
 			vaper->va_atime = np->n_atim;
 		if (np->n_flag & NUPD)
 			vaper->va_mtime = np->n_mtim;
 	}
 	return (0);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Set up nameidata for a lookup() call and do it.
  *
  * If pubflag is set, this call is done for a lookup operation on the
  * public filehandle. In that case we allow crossing mountpoints and
  * absolute pathnames. However, the caller is expected to check that
  * the lookup result is within the public fs, and deny access if
  * it is not.
  *
  * nfs_namei() clears out garbage fields that namei() might leave garbage.
  * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no
  * error occurs but the parent was not requested.
  *
  * dirp may be set whether an error is returned or not, and must be 
  * released by the caller.
  */
 int
 nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, retdirp, p, kerbflag, pubflag)
 	register struct nameidata *ndp;
 	fhandle_t *fhp;
 	int len;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	struct mbuf **mdp;
 	caddr_t *dposp;
 	struct vnode **retdirp;
 	struct proc *p;
 	int kerbflag, pubflag;
 {
 	register int i, rem;
 	register struct mbuf *md;
 	register char *fromcp, *tocp, *cp;
 	struct iovec aiov;
 	struct uio auio;
 	struct vnode *dp;
 	int error, rdonly, linklen;
 	struct componentname *cnp = &ndp->ni_cnd;
 
 	*retdirp = (struct vnode *)0;
 	cnp->cn_pnbuf = zalloc(namei_zone);
 
 	/*
 	 * Copy the name from the mbuf list to ndp->ni_pnbuf
 	 * and set the various ndp fields appropriately.
 	 */
 	fromcp = *dposp;
 	tocp = cnp->cn_pnbuf;
 	md = *mdp;
 	rem = mtod(md, caddr_t) + md->m_len - fromcp;
 	cnp->cn_hash = 0;
 	for (i = 0; i < len; i++) {
 		while (rem == 0) {
 			md = md->m_next;
 			if (md == NULL) {
 				error = EBADRPC;
 				goto out;
 			}
 			fromcp = mtod(md, caddr_t);
 			rem = md->m_len;
 		}
 		if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) {
 			error = EACCES;
 			goto out;
 		}
 		cnp->cn_hash += (unsigned char)*fromcp;
 		*tocp++ = *fromcp++;
 		rem--;
 	}
 	*tocp = '\0';
 	*mdp = md;
 	*dposp = fromcp;
 	len = nfsm_rndup(len)-len;
 	if (len > 0) {
 		if (rem >= len)
 			*dposp += len;
 		else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0)
 			goto out;
 	}
 
 	/*
 	 * Extract and set starting directory.
 	 */
 	error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
 	    nam, &rdonly, kerbflag, pubflag);
 	if (error)
 		goto out;
 	if (dp->v_type != VDIR) {
 		vrele(dp);
 		error = ENOTDIR;
 		goto out;
 	}
 
 	if (rdonly)
 		cnp->cn_flags |= RDONLY;
 
 	/*
 	 * Set return directory.  Reference to dp is implicitly transfered 
 	 * to the returned pointer
 	 */
 	*retdirp = dp;
 
 	if (pubflag) {
 		/*
 		 * Oh joy. For WebNFS, handle those pesky '%' escapes,
 		 * and the 'native path' indicator.
 		 */
 		cp = zalloc(namei_zone);
 		fromcp = cnp->cn_pnbuf;
 		tocp = cp;
 		if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) {
 			switch ((unsigned char)*fromcp) {
 			case WEBNFS_NATIVE_CHAR:
 				/*
 				 * 'Native' path for us is the same
 				 * as a path according to the NFS spec,
 				 * just skip the escape char.
 				 */
 				fromcp++;
 				break;
 			/*
 			 * More may be added in the future, range 0x80-0xff
 			 */
 			default:
 				error = EIO;
 				zfree(namei_zone, cp);
 				goto out;
 			}
 		}
 		/*
 		 * Translate the '%' escapes, URL-style.
 		 */
 		while (*fromcp != '\0') {
 			if (*fromcp == WEBNFS_ESC_CHAR) {
 				if (fromcp[1] != '\0' && fromcp[2] != '\0') {
 					fromcp++;
 					*tocp++ = HEXSTRTOI(fromcp);
 					fromcp += 2;
 					continue;
 				} else {
 					error = ENOENT;
 					zfree(namei_zone, cp);
 					goto out;
 				}
 			} else
 				*tocp++ = *fromcp++;
 		}
 		*tocp = '\0';
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_pnbuf = cp;
 	}
 
 	ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1;
 	ndp->ni_segflg = UIO_SYSSPACE;
 
 	if (pubflag) {
 		ndp->ni_rootdir = rootvnode;
 		ndp->ni_loopcnt = 0;
 		if (cnp->cn_pnbuf[0] == '/')
 			dp = rootvnode;
 	} else {
 		cnp->cn_flags |= NOCROSSMOUNT;
 	}
 
 	/*
 	 * Initialize for scan, set ni_startdir and bump ref on dp again
 	 * becuase lookup() will dereference ni_startdir.
 	 */
 
 	cnp->cn_proc = p;
 	VREF(dp);
 	ndp->ni_startdir = dp;
 
 	for (;;) {
 		cnp->cn_nameptr = cnp->cn_pnbuf;
 		/*
 		 * Call lookup() to do the real work.  If an error occurs,
 		 * ndp->ni_vp and ni_dvp are left uninitialized or NULL and
 		 * we do not have to dereference anything before returning.
 		 * In either case ni_startdir will be dereferenced and NULLed
 		 * out.
 		 */
 		error = lookup(ndp);
 		if (error)
 			break;
 
 		/*
 		 * Check for encountering a symbolic link.  Trivial 
 		 * termination occurs if no symlink encountered.
 		 * Note: zfree is safe because error is 0, so we will
 		 * not zfree it again when we break.
 		 */
 		if ((cnp->cn_flags & ISSYMLINK) == 0) {
 			nfsrv_object_create(ndp->ni_vp);
 			if (cnp->cn_flags & (SAVENAME | SAVESTART))
 				cnp->cn_flags |= HASBUF;
 			else
 				zfree(namei_zone, cnp->cn_pnbuf);
 			break;
 		}
 
 		/*
 		 * Validate symlink
 		 */
 		if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
 			VOP_UNLOCK(ndp->ni_dvp, 0, p);
 		if (!pubflag) {
 			error = EINVAL;
 			goto badlink2;
 		}
 
 		if (ndp->ni_loopcnt++ >= MAXSYMLINKS) {
 			error = ELOOP;
 			goto badlink2;
 		}
 		if (ndp->ni_pathlen > 1)
 			cp = zalloc(namei_zone);
 		else
 			cp = cnp->cn_pnbuf;
 		aiov.iov_base = cp;
 		aiov.iov_len = MAXPATHLEN;
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_procp = (struct proc *)0;
 		auio.uio_resid = MAXPATHLEN;
 		error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred);
 		if (error) {
 		badlink1:
 			if (ndp->ni_pathlen > 1)
 				zfree(namei_zone, cp);
 		badlink2:
 			vrele(ndp->ni_dvp);
 			vput(ndp->ni_vp);
 			break;
 		}
 		linklen = MAXPATHLEN - auio.uio_resid;
 		if (linklen == 0) {
 			error = ENOENT;
 			goto badlink1;
 		}
 		if (linklen + ndp->ni_pathlen >= MAXPATHLEN) {
 			error = ENAMETOOLONG;
 			goto badlink1;
 		}
 
 		/*
 		 * Adjust or replace path
 		 */
 		if (ndp->ni_pathlen > 1) {
 			bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen);
 			zfree(namei_zone, cnp->cn_pnbuf);
 			cnp->cn_pnbuf = cp;
 		} else
 			cnp->cn_pnbuf[linklen] = '\0';
 		ndp->ni_pathlen += linklen;
 
 		/*
 		 * Cleanup refs for next loop and check if root directory 
 		 * should replace current directory.  Normally ni_dvp 
 		 * becomes the new base directory and is cleaned up when
 		 * we loop.  Explicitly null pointers after invalidation
 		 * to clarify operation.
 		 */
 		vput(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 
 		if (cnp->cn_pnbuf[0] == '/') {
 			vrele(ndp->ni_dvp);
 			ndp->ni_dvp = ndp->ni_rootdir;
 			VREF(ndp->ni_dvp);
 		}
 		ndp->ni_startdir = ndp->ni_dvp;
 		ndp->ni_dvp = NULL;
 	}
 
 	/*
 	 * nfs_namei() guarentees that fields will not contain garbage
 	 * whether an error occurs or not.  This allows the caller to track
 	 * cleanup state trivially.
 	 */
 out:
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		ndp->ni_vp = NULL;
 		ndp->ni_dvp = NULL;
 		ndp->ni_startdir = NULL;
 		cnp->cn_flags &= ~HASBUF;
 	} else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) {
 		ndp->ni_dvp = NULL;
 	}
 	return (error);
 }
 
 /*
  * A fiddled version of m_adj() that ensures null fill to a long
  * boundary and only trims off the back end
  */
 void
 nfsm_adj(mp, len, nul)
 	struct mbuf *mp;
 	register int len;
 	int nul;
 {
 	register struct mbuf *m;
 	register int count, i;
 	register char *cp;
 
 	/*
 	 * Trim from tail.  Scan the mbuf chain,
 	 * calculating its length and finding the last mbuf.
 	 * If the adjustment only affects this mbuf, then just
 	 * adjust and return.  Otherwise, rescan and truncate
 	 * after the remaining size.
 	 */
 	count = 0;
 	m = mp;
 	for (;;) {
 		count += m->m_len;
 		if (m->m_next == (struct mbuf *)0)
 			break;
 		m = m->m_next;
 	}
 	if (m->m_len > len) {
 		m->m_len -= len;
 		if (nul > 0) {
 			cp = mtod(m, caddr_t)+m->m_len-nul;
 			for (i = 0; i < nul; i++)
 				*cp++ = '\0';
 		}
 		return;
 	}
 	count -= len;
 	if (count < 0)
 		count = 0;
 	/*
 	 * Correct length for chain is "count".
 	 * Find the mbuf with last data, adjust its length,
 	 * and toss data from remaining mbufs on chain.
 	 */
 	for (m = mp; m; m = m->m_next) {
 		if (m->m_len >= count) {
 			m->m_len = count;
 			if (nul > 0) {
 				cp = mtod(m, caddr_t)+m->m_len-nul;
 				for (i = 0; i < nul; i++)
 					*cp++ = '\0';
 			}
 			break;
 		}
 		count -= m->m_len;
 	}
 	for (m = m->m_next;m;m = m->m_next)
 		m->m_len = 0;
 }
 
 /*
  * Make these functions instead of macros, so that the kernel text size
  * doesn't get too big...
  */
 void
 nfsm_srvwcc(nfsd, before_ret, before_vap, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int before_ret;
 	register struct vattr *before_vap;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 
 	if (before_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, 7 * NFSX_UNSIGNED);
 		*tl++ = nfs_true;
 		txdr_hyper(before_vap->va_size, tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_mtime), tl);
 		tl += 2;
 		txdr_nfsv3time(&(before_vap->va_ctime), tl);
 	}
 	*bposp = bpos;
 	*mbp = mb;
 	nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp);
 }
 
 void
 nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp)
 	struct nfsrv_descript *nfsd;
 	int after_ret;
 	struct vattr *after_vap;
 	struct mbuf **mbp;
 	char **bposp;
 {
 	register struct mbuf *mb = *mbp, *mb2;
 	register char *bpos = *bposp;
 	register u_int32_t *tl;
 	register struct nfs_fattr *fp;
 
 	if (after_ret) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR);
 		*tl++ = nfs_true;
 		fp = (struct nfs_fattr *)tl;
 		nfsm_srvfattr(nfsd, after_vap, fp);
 	}
 	*mbp = mb;
 	*bposp = bpos;
 }
 
 void
 nfsm_srvfattr(nfsd, vap, fp)
 	register struct nfsrv_descript *nfsd;
 	register struct vattr *vap;
 	register struct nfs_fattr *fp;
 {
 
 	fp->fa_nlink = txdr_unsigned(vap->va_nlink);
 	fp->fa_uid = txdr_unsigned(vap->va_uid);
 	fp->fa_gid = txdr_unsigned(vap->va_gid);
 	if (nfsd->nd_flag & ND_NFSV3) {
 		fp->fa_type = vtonfsv3_type(vap->va_type);
 		fp->fa_mode = vtonfsv3_mode(vap->va_mode);
 		txdr_hyper(vap->va_size, &fp->fa3_size);
 		txdr_hyper(vap->va_bytes, &fp->fa3_used);
 		fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev));
 		fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev));
 		fp->fa3_fsid.nfsuquad[0] = 0;
 		fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid);
 		fp->fa3_fileid.nfsuquad[0] = 0;
 		fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime);
 		txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime);
 		txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime);
 	} else {
 		fp->fa_type = vtonfsv2_type(vap->va_type);
 		fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		fp->fa2_size = txdr_unsigned(vap->va_size);
 		fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize);
 		if (vap->va_type == VFIFO)
 			fp->fa2_rdev = 0xffffffff;
 		else
 			fp->fa2_rdev = txdr_unsigned(vap->va_rdev);
 		fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE);
 		fp->fa2_fsid = txdr_unsigned(vap->va_fsid);
 		fp->fa2_fileid = txdr_unsigned(vap->va_fileid);
 		txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime);
 		txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime);
 		txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime);
 	}
 }
 
 /*
  * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
  * 	- look up fsid in mount list (if not found ret error)
  *	- get vp and export rights by calling VFS_FHTOVP()
  *	- if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
  *	- if not lockflag unlock it with VOP_UNLOCK()
  */
 int
 nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp, kerbflag, pubflag)
 	fhandle_t *fhp;
 	int lockflag;
 	struct vnode **vpp;
 	struct ucred *cred;
 	struct nfssvc_sock *slp;
 	struct sockaddr *nam;
 	int *rdonlyp;
 	int kerbflag;
 	int pubflag;
 {
 	struct proc *p = curproc; /* XXX */
 	register struct mount *mp;
 	register int i;
 	struct ucred *credanon;
 	int error, exflags;
 #ifdef MNT_EXNORESPORT		/* XXX needs mountd and /etc/exports help yet */
 	struct sockaddr_int *saddr;
 #endif
 
 	*vpp = (struct vnode *)0;
 
 	if (nfs_ispublicfh(fhp)) {
 		if (!pubflag || !nfs_pub.np_valid)
 			return (ESTALE);
 		fhp = &nfs_pub.np_handle;
 	}
 
 	mp = vfs_getvfs(&fhp->fh_fsid);
 	if (!mp)
 		return (ESTALE);
 	error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon);
 	if (error)
 		return (error);
 #ifdef MNT_EXNORESPORT
 	if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) {
 		saddr = (struct sockaddr_in *)nam;
 		if (saddr->sin_family == AF_INET &&
 		    ntohs(saddr->sin_port) >= IPPORT_RESERVED) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	}
 #endif
 	/*
 	 * Check/setup credentials.
 	 */
 	if (exflags & MNT_EXKERB) {
 		if (!kerbflag) {
 			vput(*vpp);
 			return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 		}
 	} else if (kerbflag) {
 		vput(*vpp);
 		return (NFSERR_AUTHERR | AUTH_TOOWEAK);
 	} else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
 		cred->cr_uid = credanon->cr_uid;
 		for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
 			cred->cr_groups[i] = credanon->cr_groups[i];
 		cred->cr_ngroups = i;
 	}
 	if (exflags & MNT_EXRDONLY)
 		*rdonlyp = 1;
 	else
 		*rdonlyp = 0;
 
 	nfsrv_object_create(*vpp);
 
 	if (!lockflag)
 		VOP_UNLOCK(*vpp, 0, p);
 	return (0);
 }
 
 
 /*
  * WebNFS: check if a filehandle is a public filehandle. For v3, this
  * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has
  * transformed this to all zeroes in both cases, so check for it.
  */
 int
 nfs_ispublicfh(fhp)
 	fhandle_t *fhp;
 {
 	char *cp = (char *)fhp;
 	int i;
 
 	for (i = 0; i < NFSX_V3FH; i++)
 		if (*cp++ != 0)
 			return (FALSE);
 	return (TRUE);
 }
   
 #endif /* NFS_NOSERVER */
 /*
  * This function compares two net addresses by family and returns TRUE
  * if they are the same host.
  * If there is any doubt, return FALSE.
  * The AF_INET family is handled as a special case so that address mbufs
  * don't need to be saved to store "struct in_addr", which is only 4 bytes.
  */
 int
 netaddr_match(family, haddr, nam)
 	int family;
 	union nethostaddr *haddr;
 	struct sockaddr *nam;
 {
 	register struct sockaddr_in *inetaddr;
 
 	switch (family) {
 	case AF_INET:
 		inetaddr = (struct sockaddr_in *)nam;
 		if (inetaddr->sin_family == AF_INET &&
 		    inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
 			return (1);
 		break;
 #ifdef ISO
 	case AF_ISO:
 	    {
 		register struct sockaddr_iso *isoaddr1, *isoaddr2;
 
 		isoaddr1 = (struct sockaddr_iso *)nam;
 		isoaddr2 = (struct sockaddr_iso *)haddr->had_nam;
 		if (isoaddr1->siso_family == AF_ISO &&
 		    isoaddr1->siso_nlen > 0 &&
 		    isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
 		    SAME_ISOADDR(isoaddr1, isoaddr2))
 			return (1);
 		break;
 	    }
 #endif	/* ISO */
 	default:
 		break;
 	};
 	return (0);
 }
 
 static nfsuint64 nfs_nullcookie = { { 0, 0 } };
 /*
  * This function finds the directory cookie that corresponds to the
  * logical byte offset given.
  */
 nfsuint64 *
 nfs_getcookie(np, off, add)
 	register struct nfsnode *np;
 	off_t off;
 	int add;
 {
 	register struct nfsdmap *dp, *dp2;
 	register int pos;
 
 	pos = (uoff_t)off / NFS_DIRBLKSIZ;
 	if (pos == 0 || off < 0) {
 #ifdef DIAGNOSTIC
 		if (add)
 			panic("nfs getcookie add at <= 0");
 #endif
 		return (&nfs_nullcookie);
 	}
 	pos--;
 	dp = np->n_cookies.lh_first;
 	if (!dp) {
 		if (add) {
 			MALLOC(dp, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp->ndm_eocookie = 0;
 			LIST_INSERT_HEAD(&np->n_cookies, dp, ndm_list);
 		} else
 			return ((nfsuint64 *)0);
 	}
 	while (pos >= NFSNUMCOOKIES) {
 		pos -= NFSNUMCOOKIES;
 		if (dp->ndm_list.le_next) {
 			if (!add && dp->ndm_eocookie < NFSNUMCOOKIES &&
 				pos >= dp->ndm_eocookie)
 				return ((nfsuint64 *)0);
 			dp = dp->ndm_list.le_next;
 		} else if (add) {
 			MALLOC(dp2, struct nfsdmap *, sizeof (struct nfsdmap),
 				M_NFSDIROFF, M_WAITOK);
 			dp2->ndm_eocookie = 0;
 			LIST_INSERT_AFTER(dp, dp2, ndm_list);
 			dp = dp2;
 		} else
 			return ((nfsuint64 *)0);
 	}
 	if (pos >= dp->ndm_eocookie) {
 		if (add)
 			dp->ndm_eocookie = pos + 1;
 		else
 			return ((nfsuint64 *)0);
 	}
 	return (&dp->ndm_cookies[pos]);
 }
 
 /*
  * Invalidate cached directory information, except for the actual directory
  * blocks (which are invalidated separately).
  * Done mainly to avoid the use of stale offset cookies.
  */
 void
 nfs_invaldir(vp)
 	register struct vnode *vp;
 {
 	register struct nfsnode *np = VTONFS(vp);
 
 #ifdef DIAGNOSTIC
 	if (vp->v_type != VDIR)
 		panic("nfs: invaldir not dir");
 #endif
 	np->n_direofoffset = 0;
 	np->n_cookieverf.nfsuquad[0] = 0;
 	np->n_cookieverf.nfsuquad[1] = 0;
 	if (np->n_cookies.lh_first)
 		np->n_cookies.lh_first->ndm_eocookie = 0;
 }
 
 /*
  * The write verifier has changed (probably due to a server reboot), so all
  * B_NEEDCOMMIT blocks will have to be written again. Since they are on the
  * dirty block list as B_DELWRI, all this takes is clearing the B_NEEDCOMMIT
  * flag. Once done the new write verifier can be set for the mount point.
  */
 void
 nfs_clearcommit(mp)
 	struct mount *mp;
 {
 	register struct vnode *vp, *nvp;
 	register struct buf *bp, *nbp;
 	int s;
 
 	s = splbio();
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		if (vp->v_mount != mp)	/* Paranoia */
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bp->b_flags &= ~B_NEEDCOMMIT;
 		}
 	}
 	splx(s);
 }
 
 #ifndef NFS_NOSERVER
 /*
  * Map errnos to NFS error numbers. For Version 3 also filter out error
  * numbers not specified for the associated procedure.
  */
 int
 nfsrv_errmap(nd, err)
 	struct nfsrv_descript *nd;
 	register int err;
 {
 	register short *defaulterrp, *errp;
 
 	if (nd->nd_flag & ND_NFSV3) {
 	    if (nd->nd_procnum <= NFSPROC_COMMIT) {
 		errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum];
 		while (*++errp) {
 			if (*errp == err)
 				return (err);
 			else if (*errp > err)
 				break;
 		}
 		return ((int)*defaulterrp);
 	    } else
 		return (err & 0xffff);
 	}
 	if (err <= ELAST)
 		return ((int)nfsrv_v2errmap[err - 1]);
 	return (NFSERR_IO);
 }
 
 int
 nfsrv_object_create(vp)
 	struct vnode *vp;
 {
 
 	if (vp == NULL || vp->v_type != VREG)
 		return (1);
 	return (vfs_object_create(vp, curproc,
 				  curproc ? curproc->p_ucred : NULL));
 }
 
 /*
  * Sort the group list in increasing numerical order.
  * (Insertion sort by Chris Torek, who was grossed out by the bubble sort
  *  that used to be here.)
  */
 void
 nfsrvw_sort(list, num)
         register gid_t *list;
         register int num;
 {
 	register int i, j;
 	gid_t v;
 
 	/* Insertion sort. */
 	for (i = 1; i < num; i++) {
 		v = list[i];
 		/* find correct slot for value v, moving others up */
 		for (j = i; --j >= 0 && v < list[j];)
 			list[j + 1] = list[j];
 		list[j + 1] = v;
 	}
 }
 
 /*
  * copy credentials making sure that the result can be compared with bcmp().
  */
 void
 nfsrv_setcred(incred, outcred)
 	register struct ucred *incred, *outcred;
 {
 	register int i;
 
 	bzero((caddr_t)outcred, sizeof (struct ucred));
 	outcred->cr_ref = 1;
 	outcred->cr_uid = incred->cr_uid;
 	outcred->cr_ngroups = incred->cr_ngroups;
 	for (i = 0; i < incred->cr_ngroups; i++)
 		outcred->cr_groups[i] = incred->cr_groups[i];
 	nfsrvw_sort(outcred->cr_groups, outcred->cr_ngroups);
 }
 #endif /* NFS_NOSERVER */
Index: head/sys/ntfs/ntfs_compr.c
===================================================================
--- head/sys/ntfs/ntfs_compr.c	(revision 49534)
+++ head/sys/ntfs/ntfs_compr.c	(revision 49535)
@@ -1,120 +1,118 @@
 /*	$NetBSD: ntfs_compr.c,v 1.2 1999/05/06 15:43:18 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_compr.c,v 1.3 1999/04/20 21:06:43 semenu Exp $
+ *	$Id: ntfs_compr.c,v 1.4 1999/05/12 09:42:54 semenu Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/malloc.h>
 #ifdef __FreeBSD__
 #include <machine/clock.h>
 #endif
-
-#include <miscfs/specfs/specdev.h>
 
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_compr.h>
 
 #define GET_UINT16(addr)	(*((u_int16_t *)(addr)))
 
 int
 ntfs_uncompblock(
 	u_int8_t * buf,
 	u_int8_t * cbuf)
 {
 	u_int32_t       ctag;
 	int             len, dshift, lmask;
 	int             blen, boff;
 	int             i, j;
 	int             pos, cpos;
 
 	len = GET_UINT16(cbuf) & 0xFFF;
 	dprintf(("ntfs_uncompblock: block length: %d + 3, 0x%x,0x%04x\n",
 	    len, len, GET_UINT16(cbuf)));
 
 	if (!(GET_UINT16(cbuf) & 0x8000)) {
 		if ((len + 1) != NTFS_COMPBLOCK_SIZE) {
 			dprintf(("ntfs_uncompblock: len: %x instead of %d\n",
 			    len, 0xfff));
 		}
 		memcpy(buf, cbuf + 2, len + 1);
 		bzero(buf + len + 1, NTFS_COMPBLOCK_SIZE - 1 - len);
 		return len + 3;
 	}
 	cpos = 2;
 	pos = 0;
 	while ((cpos < len + 3) && (pos < NTFS_COMPBLOCK_SIZE)) {
 		ctag = cbuf[cpos++];
 		for (i = 0; (i < 8) && (pos < NTFS_COMPBLOCK_SIZE); i++) {
 			if (ctag & 1) {
 				for (j = pos - 1, lmask = 0xFFF, dshift = 12;
 				     j >= 0x10; j >>= 1) {
 					dshift--;
 					lmask >>= 1;
 				}
 				boff = -1 - (GET_UINT16(cbuf + cpos) >> dshift);
 				blen = 3 + (GET_UINT16(cbuf + cpos) & lmask);
 				for (j = 0; (j < blen) && (pos < NTFS_COMPBLOCK_SIZE); j++) {
 					buf[pos] = buf[pos + boff];
 					pos++;
 				}
 				cpos += 2;
 			} else {
 				buf[pos++] = cbuf[cpos++];
 			}
 			ctag >>= 1;
 		}
 	}
 	return len + 3;
 }
 
 int
 ntfs_uncompunit(
 	struct ntfsmount * ntmp,
 	u_int8_t * uup,
 	u_int8_t * cup)
 {
 	int             i;
 	int             off = 0;
 	int             new;
 
 	for (i = 0; i * NTFS_COMPBLOCK_SIZE < ntfs_cntob(NTFS_COMPUNIT_CL); i++) {
 		new = ntfs_uncompblock(uup + i * NTFS_COMPBLOCK_SIZE, cup + off);
 		if (new == 0)
 			return (EINVAL);
 		off += new;
 	}
 	return (0);
 }
Index: head/sys/ntfs/ntfs_subr.c
===================================================================
--- head/sys/ntfs/ntfs_subr.c	(revision 49534)
+++ head/sys/ntfs/ntfs_subr.c	(revision 49535)
@@ -1,1901 +1,1899 @@
 /*	$NetBSD: ntfs_subr.c,v 1.2 1999/05/06 15:43:19 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_subr.c,v 1.3 1999/04/20 21:06:43 semenu Exp $
+ *	$Id: ntfs_subr.c,v 1.4 1999/05/12 09:43:01 semenu Exp $
  */
 
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/file.h>
 #include <sys/malloc.h>
 #if defined(__FreeBSD__)
 #include <machine/clock.h>
 #endif
-
-#include <miscfs/specfs/specdev.h>
 
 /* #define NTFS_DEBUG 1 */
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfsmount.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_vfsops.h>
 #include <ntfs/ntfs_extern.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_compr.h>
 #include <ntfs/ntfs_ihash.h>
 
 #if defined(__FreeBSD__)
 MALLOC_DEFINE(M_NTFSNTVATTR, "NTFS vattr", "NTFS file attribute information");
 MALLOC_DEFINE(M_NTFSRDATA, "NTFS res data", "NTFS resident data");
 MALLOC_DEFINE(M_NTFSRUN, "NTFS vrun", "NTFS vrun storage");
 MALLOC_DEFINE(M_NTFSDECOMP, "NTFS decomp", "NTFS decompression temporary");
 #endif
 
 /*
  * 
  */
 int
 ntfs_ntvattrrele(
 		 struct ntvattr * vap)
 {
 	dprintf(("ntfs_ntvattrrele: ino: %d, type: 0x%x\n",
 		 vap->va_ip->i_number, vap->va_type));
 
 	ntfs_ntrele(vap->va_ip);
 
 	return (0);
 }
 
 /*
  * Search attribute specifed in ntnode (load ntnode if nessecary).
  * If not found but ATTR_A_ATTRLIST present, read it in and search throught.
  * VOP_VGET node needed, and lookup througth it's ntnode (load if nessesary).
  *
  * ntnode should be locked
  */
 int
 ntfs_ntvattrget(
 		struct ntfsmount * ntmp,
 		struct ntnode * ip,
 		u_int32_t type,
 		char *name,
 		cn_t vcn,
 		struct ntvattr ** vapp)
 {
 	int             error;
 	struct ntvattr *vap;
 	struct ntvattr *lvap = NULL;
 	struct attr_attrlist *aalp;
 	struct attr_attrlist *nextaalp;
 	caddr_t         alpool;
 	int             len, namelen;
 
 	*vapp = NULL;
 
 	if (name) {
 		dprintf(("ntfs_ntvattrget: " \
 			 "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 			 ip->i_number, type, name, (u_int32_t) vcn));
 		namelen = strlen(name);
 	} else {
 		dprintf(("ntfs_ntvattrget: " \
 			 "ino: %d, type: 0x%x, vcn: %d\n", \
 			 ip->i_number, type, (u_int32_t) vcn));
 		name = "";
 		namelen = 0;
 	}
 
 	if((ip->i_flag & IN_LOADED) == 0) {
 		dprintf(("ntfs_ntvattrget: node not loaded, ino: %d\n",
 		       ip->i_number));
 		error = ntfs_loadntnode(ntmp,ip);
 		if(error) {
 			printf("ntfs_ntvattrget: FAILED TO LOAD INO: %d\n",
 			       ip->i_number);
 			return (error);
 		}
 	}
 
 	for (vap = ip->i_valist.lh_first; vap; vap = vap->va_list.le_next) {
 		ddprintf(("type: 0x%x, vcn: %d - %d\n", \
 			  vap->va_type, (u_int32_t) vap->va_vcnstart, \
 			  (u_int32_t) vap->va_vcnend));
 		if ((vap->va_type == type) &&
 		    (vap->va_vcnstart <= vcn) && (vap->va_vcnend >= vcn) &&
 		    (vap->va_namelen == namelen) &&
 		    (!strncmp(name, vap->va_name, namelen))) {
 			*vapp = vap;
 			ntfs_ntref(vap->va_ip);
 			return (0);
 		}
 		if (vap->va_type == NTFS_A_ATTRLIST)
 			lvap = vap;
 	}
 
 	if (!lvap) {
 		dprintf(("ntfs_ntvattrget: UNEXISTED ATTRIBUTE: " \
 		       "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 		       ip->i_number, type, name, (u_int32_t) vcn));
 		return (ENOENT);
 	}
 	/* Scan $ATTRIBUTE_LIST for requested attribute */
 	len = lvap->va_datalen;
 	MALLOC(alpool, caddr_t, len, M_TEMP, M_WAITOK);
 	error = ntfs_readntvattr_plain(ntmp, ip, lvap, 0, len, alpool, &len);
 	if (error)
 		goto out;
 
 	aalp = (struct attr_attrlist *) alpool;
 	nextaalp = NULL;
 
 	while (len > 0) {
 		dprintf(("ntfs_ntvattrget: " \
 			 "attrlist: ino: %d, attr: 0x%x, vcn: %d\n", \
 			 aalp->al_inumber, aalp->al_type, \
 			 (u_int32_t) aalp->al_vcnstart));
 
 		if (len > aalp->reclen) {
 			nextaalp = NTFS_NEXTREC(aalp, struct attr_attrlist *);
 		} else {
 			nextaalp = NULL;
 		}
 		len -= aalp->reclen;
 
 #define AALPCMP(aalp,type,name,namelen) (				\
   (aalp->al_type == type) && (aalp->al_namelen == namelen) &&		\
   !uastrcmp(aalp->al_name,aalp->al_namelen,name,namelen) )
 
 		if (AALPCMP(aalp, type, name, namelen) &&
 		    (!nextaalp || (nextaalp->al_vcnstart > vcn) ||
 		     !AALPCMP(nextaalp, type, name, namelen))) {
 			struct vnode   *newvp;
 			struct ntnode  *newip;
 
 			dprintf(("ntfs_ntvattrget: attrbute in ino: %d\n",
 				 aalp->al_inumber));
 
 /*
 			error = VFS_VGET(ntmp->ntm_mountp, aalp->al_inumber,
 					 &newvp);
 */
 			error = ntfs_vgetex(ntmp->ntm_mountp, aalp->al_inumber,
 					NTFS_A_DATA, NULL, LK_EXCLUSIVE,
 					VG_EXT, curproc, &newvp);
 			if (error) {
 				printf("ntfs_ntvattrget: CAN'T VGET INO: %d\n",
 				       aalp->al_inumber);
 				goto out;
 			}
 			newip = VTONT(newvp);
 			/* XXX have to lock ntnode */
 			if(~newip->i_flag & IN_LOADED) {
 				dprintf(("ntfs_ntvattrget: node not loaded," \
 					 " ino: %d\n", newip->i_number));
 				error = ntfs_loadntnode(ntmp,ip);
 				if(error) {
 					printf("ntfs_ntvattrget: CAN'T LOAD " \
 					       "INO: %d\n", newip->i_number);
 					vput(newvp);
 					goto out;
 				}
 			}
 			for (vap = newip->i_valist.lh_first; vap; vap = vap->va_list.le_next) {
 				if ((vap->va_type == type) &&
 				    (vap->va_vcnstart <= vcn) &&
 				    (vap->va_vcnend >= vcn) &&
 				    (vap->va_namelen == namelen) &&
 				  (!strncmp(name, vap->va_name, namelen))) {
 					*vapp = vap;
 					ntfs_ntref(vap->va_ip);
 					vput(newvp);
 					error = 0;
 					goto out;
 				}
 				if (vap->va_type == NTFS_A_ATTRLIST)
 					lvap = vap;
 			}
 			printf("ntfs_ntvattrget: ATTRLIST ERROR.\n");
 			vput(newvp);
 			break;
 		}
 #undef AALPCMP
 		aalp = nextaalp;
 	}
 	error = ENOENT;
 
 	dprintf(("ntfs_ntvattrget: UNEXISTED ATTRIBUTE: " \
 	       "ino: %d, type: 0x%x, name: %s, vcn: %d\n", \
 	       ip->i_number, type, name, (u_int32_t) vcn));
 out:
 	FREE(alpool, M_TEMP);
 	return (error);
 }
 
 /*
  * Read ntnode from disk, make ntvattr list.
  *
  * ntnode should be locked
  */
 int
 ntfs_loadntnode(
 	      struct ntfsmount * ntmp,
 	      struct ntnode * ip)
 {
 	struct filerec  *mfrp;
 	daddr_t         bn;
 	int		error,off;
 	struct attr    *ap;
 	struct ntvattr *nvap;
 
 	dprintf(("ntfs_loadnode: loading ino: %d\n",ip->i_number));
 
 	MALLOC(mfrp, struct filerec *, ntfs_bntob(ntmp->ntm_bpmftrec),
 	       M_TEMP, M_WAITOK);
 
 	if (ip->i_number < NTFS_SYSNODESNUM) {
 		struct buf     *bp;
 
 		dprintf(("ntfs_loadnode: read system node\n"));
 
 		bn = ntfs_cntobn(ntmp->ntm_mftcn) +
 			ntmp->ntm_bpmftrec * ip->i_number;
 
 		error = bread(ntmp->ntm_devvp,
 			      bn, ntfs_bntob(ntmp->ntm_bpmftrec),
 			      NOCRED, &bp);
 		if (error) {
 			printf("ntfs_loadnode: BREAD FAILED\n");
 			brelse(bp);
 			goto out;
 		}
 		memcpy(mfrp, bp->b_data, ntfs_bntob(ntmp->ntm_bpmftrec));
 		bqrelse(bp);
 	} else {
 		struct vnode   *vp;
 
 		vp = ntmp->ntm_sysvn[NTFS_MFTINO];
 		error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			       ip->i_number * ntfs_bntob(ntmp->ntm_bpmftrec),
 			       ntfs_bntob(ntmp->ntm_bpmftrec), mfrp);
 		if (error) {
 			printf("ntfs_loadnode: ntfs_readattr failed\n");
 			goto out;
 		}
 	}
 
 	/* Check if magic and fixups are correct */
 	error = ntfs_procfixups(ntmp, NTFS_FILEMAGIC, (caddr_t)mfrp,
 				ntfs_bntob(ntmp->ntm_bpmftrec));
 	if (error) {
 		printf("ntfs_loadnode: BAD MFT RECORD %d\n",
 		       (u_int32_t) ip->i_number);
 		goto out;
 	}
 
 	dprintf(("ntfs_loadnode: load attrs for ino: %d\n",ip->i_number));
 	off = mfrp->fr_attroff;
 	ap = (struct attr *) ((caddr_t)mfrp + off);
 
 	LIST_INIT(&ip->i_valist);
 	
 	while (ap->a_hdr.a_type != -1) {
 		error = ntfs_attrtontvattr(ntmp, &nvap, ap);
 		if (error)
 			break;
 		nvap->va_ip = ip;
 
 		LIST_INSERT_HEAD(&ip->i_valist, nvap, va_list);
 
 		off += ap->a_hdr.reclen;
 		ap = (struct attr *) ((caddr_t)mfrp + off);
 	}
 	if (error) {
 		printf("ntfs_loadnode: failed to load attr ino: %d\n",
 		       ip->i_number);
 		goto out;
 	}
 
 	ip->i_mainrec = mfrp->fr_mainrec;
 	ip->i_nlink = mfrp->fr_nlink;
 	ip->i_frflag = mfrp->fr_flags;
 
 	ip->i_flag |= IN_LOADED;
 
 out:
 	FREE(mfrp, M_TEMP);
 	return (error);
 }
 		
 /*
  * Routine locks ntnode and increase usecount, just opposite of
  * ntfs_ntput.
  */
 int
 ntfs_ntget(
 	   struct ntnode *ip)
 {
 	dprintf(("ntfs_ntget: get ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount++;
 
 restart:
 	if (ip->i_lock) {
 		while (ip->i_lock) {
 			ip->i_lock = -1;
 			tsleep(&ip->i_lock, PVM, "ntnode", 0);
 		}
 		goto restart;
 	}
 	ip->i_lock = 1;
 
 	return 0;
 }
 
 /*
  * Routine search ntnode in hash, if found: lock, inc usecount and return.
  * If not in hash allocate structure for ntnode, prefill it, lock,
  * inc count and return.
  *
  * ntnode returned locked
  */
 static int ntfs_ntnode_hash_lock;
 int
 ntfs_ntlookup(
 	   struct ntfsmount * ntmp,
 	   ino_t ino,
 	   struct ntnode ** ipp)
 {
 	struct ntnode  *ip;
 
 	dprintf(("ntfs_ntlookup: for ntnode %d\n", ino));
 	*ipp = NULL;
 
 restart:
 	ip = ntfs_nthashlookup(ntmp->ntm_dev, ino); /* XXX */
 	if (ip) {
 		ntfs_ntget(ip);
 		*ipp = ip;
 		dprintf(("ntfs_ntlookup: ntnode %d: %p, usecount: %d\n",
 			ino, ip, ip->i_usecount));
 
 		return (0);
 	}
 
 	if (ntfs_ntnode_hash_lock) {
 		while(ntfs_ntnode_hash_lock) {
 			ntfs_ntnode_hash_lock = -1;
 			tsleep(&ntfs_ntnode_hash_lock, PVM, "ntfsntgt", 0);
 		}
 		goto restart;
 	}
 	ntfs_ntnode_hash_lock = 1;
 
 	MALLOC(ip, struct ntnode *, sizeof(struct ntnode),
 	       M_NTFSNTNODE, M_WAITOK);
 	ddprintf(("ntfs_ntlookup: allocating ntnode: %d: %p\n", ino, ip));
 	bzero((caddr_t) ip, sizeof(struct ntnode));
 
 	/* Generic initialization */
 	ip->i_number = ino;
 	ip->i_mp = ntmp;
 	ip->i_dev = ntmp->ntm_dev;
 	ip->i_uid = ntmp->ntm_uid;
 	ip->i_gid = ntmp->ntm_gid;
 	ip->i_mode = ntmp->ntm_mode;
 	ip->i_usecount++;
 
 	ip->i_lock = 1;
 
 	LIST_INIT(&ip->i_fnlist);
 
 	ntfs_nthashins(ip);
 
 	if (ntfs_ntnode_hash_lock < 0)
 		wakeup(&ntfs_ntnode_hash_lock);
 	ntfs_ntnode_hash_lock = 0;
 
 	*ipp = ip;
 
 	dprintf(("ntfs_ntlookup: ntnode %d: %p, usecount: %d\n",
 		ino, ip, ip->i_usecount));
 
 	return (0);
 }
 
 /*
  * Decrement usecount of ntnode and unlock it, if usecount reach zero,
  * deallocate ntnode.
  *
  * ntnode should be locked on entry, and unlocked on return.
  */
 void
 ntfs_ntput(
 	   struct ntnode *ip)
 {
 	struct ntvattr *vap;
 
 	if (!ip->i_lock) printf("ntfs_ntput: NOT LOCKED");
 
 	dprintf(("ntfs_ntput: rele ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount--;
 
 	if (ip->i_usecount < 0) {
 		panic("ntfs_ntput: ino: %d usecount: %d \n",
 		      ip->i_number,ip->i_usecount);
 	} else if (ip->i_usecount == 0) {
 		dprintf(("ntfs_ntput: deallocating ntnode: %d\n",
 			ip->i_number));
 
 		if (ip->i_fnlist.lh_first)
 			panic("ntfs_ntput: ntnode has fnodes\n");
 
 		ntfs_nthashrem(ip);
 
 		while (ip->i_valist.lh_first != NULL) {
 			vap = ip->i_valist.lh_first;
 			LIST_REMOVE(vap,va_list);
 			ntfs_freentvattr(vap);
 		}
 		FREE(ip, M_NTFSNTNODE);
 	} else {
 		if (ip->i_lock < 0)
 			wakeup(&ip->i_lock);
 		ip->i_lock = 0;
 	}
 }
 
 /*
  * Decrement usecount of ntnode.
  */
 void
 ntfs_ntrele(
 	    struct ntnode * ip)
 {
 	dprintf(("ntfs_ntrele: rele ntnode %d: %p, usecount: %d\n",
 		ip->i_number, ip, ip->i_usecount));
 
 	ip->i_usecount--;
 
 	if (ip->i_usecount < 0)
 		panic("ntfs_ntrele: ino: %d usecount: %d \n",
 		      ip->i_number,ip->i_usecount);
 }
 
 /*
  * Deallocate all memory allocated for ntvattr by call to
  * ntfs_attrtontvattr and some other functions.
  */
 void
 ntfs_freentvattr(
 		 struct ntvattr * vap)
 {
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		if (vap->va_vruncn)
 			FREE(vap->va_vruncn, M_NTFSRUN);
 		if (vap->va_vruncl)
 			FREE(vap->va_vruncl, M_NTFSRUN);
 	} else {
 		if (vap->va_datap)
 			FREE(vap->va_datap, M_NTFSRDATA);
 	}
 	FREE(vap, M_NTFSNTVATTR);
 }
 
 /*
  * Convert disk image of attribute into ntvattr structure,
  * runs are expanded also.
  */
 int
 ntfs_attrtontvattr(
 		   struct ntfsmount * ntmp,
 		   struct ntvattr ** rvapp,
 		   struct attr * rap)
 {
 	int             error, i;
 	struct ntvattr *vap;
 
 	error = 0;
 	*rvapp = NULL;
 
 	MALLOC(vap, struct ntvattr *, sizeof(struct ntvattr),
 		M_NTFSNTVATTR, M_WAITOK);
 	bzero(vap, sizeof(struct ntvattr));
 	vap->va_ip = NULL;
 	vap->va_flag = rap->a_hdr.a_flag;
 	vap->va_type = rap->a_hdr.a_type;
 	vap->va_compression = rap->a_hdr.a_compression;
 	vap->va_index = rap->a_hdr.a_index;
 
 	ddprintf(("type: 0x%x, index: %d", vap->va_type, vap->va_index));
 
 	vap->va_namelen = rap->a_hdr.a_namelen;
 	if (rap->a_hdr.a_namelen) {
 		wchar *unp = (wchar *) ((caddr_t) rap + rap->a_hdr.a_nameoff);
 		ddprintf((", name:["));
 		for (i = 0; i < vap->va_namelen; i++) {
 			vap->va_name[i] = unp[i];
 			ddprintf(("%c", vap->va_name[i]));
 		}
 		ddprintf(("]"));
 	}
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		ddprintf((", nonres."));
 		vap->va_datalen = rap->a_nr.a_datalen;
 		vap->va_allocated = rap->a_nr.a_allocated;
 		vap->va_vcnstart = rap->a_nr.a_vcnstart;
 		vap->va_vcnend = rap->a_nr.a_vcnend;
 		vap->va_compressalg = rap->a_nr.a_compressalg;
 		error = ntfs_runtovrun(&(vap->va_vruncn), &(vap->va_vruncl),
 				       &(vap->va_vruncnt),
 				       (caddr_t) rap + rap->a_nr.a_dataoff);
 	} else {
 		vap->va_compressalg = 0;
 		ddprintf((", res."));
 		vap->va_datalen = rap->a_r.a_datalen;
 		vap->va_allocated = rap->a_r.a_datalen;
 		vap->va_vcnstart = 0;
 		vap->va_vcnend = ntfs_btocn(vap->va_allocated);
 		MALLOC(vap->va_datap, caddr_t, vap->va_datalen,
 		       M_NTFSRDATA, M_WAITOK);
 		memcpy(vap->va_datap, (caddr_t) rap + rap->a_r.a_dataoff,
 		       rap->a_r.a_datalen);
 	}
 	ddprintf((", len: %d", vap->va_datalen));
 
 	if (error)
 		FREE(vap, M_NTFSNTVATTR);
 	else
 		*rvapp = vap;
 
 	ddprintf(("\n"));
 
 	return (error);
 }
 
 /*
  * Expand run into more utilizable and more memory eating format.
  */
 int
 ntfs_runtovrun(
 	       cn_t ** rcnp,
 	       cn_t ** rclp,
 	       u_long * rcntp,
 	       u_int8_t * run)
 {
 	u_int32_t       off;
 	u_int32_t       sz, i;
 	cn_t           *cn;
 	cn_t           *cl;
 	u_long		cnt;
 	cn_t		prev;
 	cn_t		tmp;
 
 	off = 0;
 	cnt = 0;
 	i = 0;
 	while (run[off]) {
 		off += (run[off] & 0xF) + ((run[off] >> 4) & 0xF) + 1;
 		cnt++;
 	}
 	MALLOC(cn, cn_t *, cnt * sizeof(cn_t), M_NTFSRUN, M_WAITOK);
 	MALLOC(cl, cn_t *, cnt * sizeof(cn_t), M_NTFSRUN, M_WAITOK);
 
 	off = 0;
 	cnt = 0;
 	prev = 0;
 	while (run[off]) {
 
 		sz = run[off++];
 		cl[cnt] = 0;
 
 		for (i = 0; i < (sz & 0xF); i++)
 			cl[cnt] += (u_int32_t) run[off++] << (i << 3);
 
 		sz >>= 4;
 		if (run[off + sz - 1] & 0x80) {
 			tmp = ((u_int64_t) - 1) << (sz << 3);
 			for (i = 0; i < sz; i++)
 				tmp |= (u_int64_t) run[off++] << (i << 3);
 		} else {
 			tmp = 0;
 			for (i = 0; i < sz; i++)
 				tmp |= (u_int64_t) run[off++] << (i << 3);
 		}
 		if (tmp)
 			prev = cn[cnt] = prev + tmp;
 		else
 			cn[cnt] = tmp;
 
 		cnt++;
 	}
 	*rcnp = cn;
 	*rclp = cl;
 	*rcntp = cnt;
 	return (0);
 }
 
 /*
  * Convert wchar to uppercase wchar, should be macros?
  */
 wchar
 ntfs_toupper(
 	     struct ntfsmount * ntmp,
 	     wchar wc)
 {
 	return (ntmp->ntm_upcase[wc & 0xFF]);
 }
 
 /*
  * Compare to unicode strings case insensible.
  */
 int
 ntfs_uustricmp(
 	       struct ntfsmount * ntmp,
 	       wchar * str1,
 	       int str1len,
 	       wchar * str2,
 	       int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; i < str1len && i < str2len; i++) {
 		res = (int) ntfs_toupper(ntmp, str1[i]) -
 			(int) ntfs_toupper(ntmp, str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /*
  * Compare unicode and ascii string case insens.
  */
 int
 ntfs_uastricmp(
 	       struct ntfsmount * ntmp,
 	       const wchar *str1,
 	       int str1len,
 	       const char *str2,
 	       int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; i < str1len && i < str2len; i++) {
 		res = (int) ntfs_toupper(ntmp, str1[i]) -
 			(int) ntfs_toupper(ntmp, (wchar) str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /*
  * Compare unicode and ascii string case sens.
  */
 int
 ntfs_uastrcmp(
 	      struct ntfsmount *ntmp,
 	      const wchar *str1,
 	      int str1len,
 	      const char *str2,
 	      int str2len)
 {
 	int             i;
 	int             res;
 
 	for (i = 0; (i < str1len) && (i < str2len); i++) {
 		res = ((int) str1[i]) - ((int) str2[i]);
 		if (res)
 			return res;
 	}
 	return (str1len - str2len);
 }
 
 /* 
  * Search fnode in ntnode, if not found allocate and preinitialize.
  *
  * ntnode should be locked on entry.
  */
 int
 ntfs_fget(
 	struct ntfsmount *ntmp,
 	struct ntnode *ip,
 	int attrtype,
 	char *attrname,
 	struct fnode **fpp)
 {
 	struct fnode *fp;
 
 	dprintf(("ntfs_fget: ino: %d, attrtype: 0x%x, attrname: %s\n",
 		ip->i_number,attrtype, attrname?attrname:""));
 	*fpp = NULL;
 	for (fp = ip->i_fnlist.lh_first; fp != NULL; fp = fp->f_fnlist.le_next){
 		dprintf(("ntfs_fget: fnode: attrtype: %d, attrname: %s\n",
 			fp->f_attrtype, fp->f_attrname?fp->f_attrname:""));
 
 		if ((attrtype == fp->f_attrtype) && 
 		    ((!attrname && !fp->f_attrname) ||
 		     (attrname && fp->f_attrname &&
 		      !strcmp(attrname,fp->f_attrname)))){
 			dprintf(("ntfs_fget: found existed: %p\n",fp));
 			*fpp = fp;
 		}
 	}
 
 	if (*fpp)
 		return (0);
 
 	MALLOC(fp, struct fnode *, sizeof(struct fnode), M_NTFSFNODE, M_WAITOK);
 	bzero(fp, sizeof(struct fnode));
 	dprintf(("ntfs_fget: allocating fnode: %p\n",fp));
 
 	fp->f_devvp = ntmp->ntm_devvp;
 	fp->f_dev = ntmp->ntm_dev;
 	fp->f_mp = ntmp;
 
 	fp->f_ip = ip;
 	fp->f_attrname = attrname;
 	if (fp->f_attrname) fp->f_flag |= FN_AATTRNAME;
 	fp->f_attrtype = attrtype;
 
 	ntfs_ntref(ip);
 
 	LIST_INSERT_HEAD(&ip->i_fnlist, fp, f_fnlist);
 
 	*fpp = fp;
 
 	return (0);
 }
 
 /*
  * Deallocate fnode, remove it from ntnode's fnode list.
  *
  * ntnode should be locked.
  */
 void
 ntfs_frele(
 	struct fnode *fp)
 {
 	struct ntnode *ip = FTONT(fp);
 
 	dprintf(("ntfs_frele: fnode: %p for %d: %p\n", fp, ip->i_number, ip));
 
 	dprintf(("ntfs_frele: deallocating fnode\n"));
 	LIST_REMOVE(fp,f_fnlist);
 	if (fp->f_flag & FN_AATTRNAME)
 		FREE(fp->f_attrname, M_TEMP);
 	if (fp->f_dirblbuf)
 		FREE(fp->f_dirblbuf, M_NTFSDIR);
 	FREE(fp, M_NTFSFNODE);
 	ntfs_ntrele(ip);
 }
 
 /*
  * Lookup attribute name in format: [[:$ATTR_TYPE]:$ATTR_NAME], 
  * $ATTR_TYPE is searched in attrdefs read from $AttrDefs.
  * If $ATTR_TYPE nott specifed, ATTR_A_DATA assumed.
  */
 int
 ntfs_ntlookupattr(
 		struct ntfsmount * ntmp,
 		const char * name,
 		int namelen,
 		int *attrtype,
 		char **attrname)
 {
 	const char *sys;
 	size_t syslen, i;
 	struct ntvattrdef *adp;
 
 	if (namelen == 0)
 		return (0);
 
 	if (name[0] == '$') {
 		sys = name;
 		for (syslen = 0; syslen < namelen; syslen++) {
 			if(sys[syslen] == ':') {
 				name++;
 				namelen--;
 				break;
 			}
 		}
 		name += syslen;
 		namelen -= syslen;
 
 		adp = ntmp->ntm_ad;
 		for (i = 0; i < ntmp->ntm_adnum; i++){
 			if((syslen == adp->ad_namelen) && 
 			   (!strncmp(sys,adp->ad_name,syslen))) {
 				*attrtype = adp->ad_type;
 				if(namelen) {
 					MALLOC((*attrname), char *, namelen,
 						M_TEMP, M_WAITOK);
 					memcpy((*attrname), name, namelen);
 					(*attrname)[namelen] = '\0';
 				}
 				return (0);
 			}
 			adp++;
 		}
 		return (ENOENT);
 	}
 
 	if(namelen) {
 		MALLOC((*attrname), char *, namelen, M_TEMP, M_WAITOK);
 		memcpy((*attrname), name, namelen);
 		(*attrname)[namelen] = '\0';
 		*attrtype = NTFS_A_DATA;
 	}
 
 	return (0);
 }
 
 /*
  * Lookup specifed node for filename, matching cnp,
  * return fnode filled.
  */
 int
 ntfs_ntlookupfile(
 	      struct ntfsmount * ntmp,
 	      struct vnode * vp,
 	      struct componentname * cnp,
 	      struct vnode ** vpp)
 {
 	struct fnode   *fp = VTOF(vp);
 	struct ntnode  *ip = FTONT(fp);
 	struct ntvattr *vap;	/* Root attribute */
 	cn_t            cn;	/* VCN in current attribute */
 	caddr_t         rdbuf;	/* Buffer to read directory's blocks  */
 	u_int32_t       blsize;
 	u_int32_t       rdsize;	/* Length of data to read from current block */
 	struct attr_indexentry *iep;
 	int             error, res, anamelen, fnamelen;
 	const char     *fname,*aname;
 	u_int32_t       aoff;
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXROOT, "$I30", 0, &vap);
 	if (error || (vap->va_flag & NTFS_AF_INRUN))
 		return (ENOTDIR);
 
 	blsize = vap->va_a_iroot->ir_size;
 	rdsize = vap->va_datalen;
 
 	/*
 	 * Divide file name into: foofilefoofilefoofile[:attrspec]
 	 * Store like this:       fname:fnamelen       [aname:anamelen]
 	 */
 	fname = cnp->cn_nameptr;
 	aname = NULL;
 	anamelen = 0;
 	for (fnamelen = 0; fnamelen < cnp->cn_namelen; fnamelen++)
 		if(fname[fnamelen] == ':') {
 			aname = fname + fnamelen + 1;
 			anamelen = cnp->cn_namelen - fnamelen - 1;
 			dprintf(("ntfs_ntlookupfile: %s (%d), attr: %s (%d)\n",
 				fname, fnamelen, aname, anamelen));
 			break;
 		}
 
 	dprintf(("ntfs_ntlookupfile: blksz: %d, rdsz: %d\n", blsize, rdsize));
 
 	MALLOC(rdbuf, caddr_t, blsize, M_TEMP, M_WAITOK);
 
 	error = ntfs_readattr(ntmp, ip, NTFS_A_INDXROOT, "$I30",
 			       0, rdsize, rdbuf);
 	if (error)
 		goto fail;
 
 	aoff = sizeof(struct attr_indexroot);
 
 	do {
 		iep = (struct attr_indexentry *) (rdbuf + aoff);
 
 		while (!(iep->ie_flag & NTFS_IEFLAG_LAST) && (rdsize > aoff)) {
 			ddprintf(("scan: %d, %d\n",
 				  (u_int32_t) iep->ie_number,
 				  (u_int32_t) iep->ie_fnametype));
 			res = ntfs_uastricmp(ntmp, iep->ie_fname,
 					     iep->ie_fnamelen, fname,
 					     fnamelen);
 			if (res == 0) {
 				/* Matched something (case ins.) */
 				if (iep->ie_fnametype == 0 ||
 				    !(ntmp->ntm_flag & NTFS_MFLAG_CASEINS))
 					res = ntfs_uastrcmp(ntmp,
 							    iep->ie_fname,
 							    iep->ie_fnamelen,
 							    fname,
 							    fnamelen);
 				if (res == 0) {
 					int attrtype = NTFS_A_DATA;
 					char *attrname = NULL;
 					struct fnode   *nfp;
 					struct vnode   *nvp;
 
 					if (aname) {
 						error = ntfs_ntlookupattr(ntmp,
 							aname, anamelen,
 							&attrtype, &attrname);
 						if (error)
 							goto fail;
 					}
 
 					/* Check if we've found ourself */
 					if ((iep->ie_number == ip->i_number) &&
 					    (attrtype == fp->f_attrtype) &&
 					    ((!attrname && !fp->f_attrname) ||
 					     (attrname && fp->f_attrname &&
 					      !strcmp(attrname, fp->f_attrname)))) {
 						VREF(vp);
 						*vpp = vp;
 						goto fail;
 					}
 
 					/* vget node, but don't load it */
 					error = ntfs_vgetex(ntmp->ntm_mountp,
 							   iep->ie_number,
 							   attrtype,
 							   attrname,
 							   LK_EXCLUSIVE,
 							   VG_DONTLOADIN | 
 							    VG_DONTVALIDFN,
 							   curproc,
 							   &nvp);
 					if(error)
 						goto fail;
 
 					nfp = VTOF(nvp);
 
 					if (nfp->f_flag & FN_VALID) {
 						*vpp = nvp;
 						goto fail;
 					}
 
 					nfp->f_fflag = iep->ie_fflag;
 					nfp->f_pnumber = iep->ie_fpnumber;
 					nfp->f_times = iep->ie_ftimes;
 
 					if((nfp->f_fflag & NTFS_FFLAG_DIR) &&
 					   (nfp->f_attrtype == NTFS_A_DATA) &&
 					   (nfp->f_attrname == NULL))
 						nfp->f_type = VDIR;	
 					else
 						nfp->f_type = VREG;	
 
 					nvp->v_type = nfp->f_type;
 
 					if ((nfp->f_attrtype == NTFS_A_DATA) &&
 					    (nfp->f_attrname == NULL)) {
 						/* Opening default attribute */
 						nfp->f_size = iep->ie_fsize;
 						nfp->f_allocated = iep->ie_fallocated;
 						nfp->f_flag |= FN_PRELOADED;
 					} else {
 						error = ntfs_filesize(ntmp, nfp,
 							    &nfp->f_size,
 							    &nfp->f_allocated);
 						if (error) {
 							vput(nvp);
 							goto fail;
 						}
 					}
 
 					nfp->f_flag &= ~FN_VALID;
 					*vpp = nvp;
 					goto fail;
 				}
 			} else if (res > 0)
 				break;
 
 			aoff += iep->reclen;
 			iep = (struct attr_indexentry *) (rdbuf + aoff);
 		}
 
 		/* Dive if possible */
 		if (iep->ie_flag & NTFS_IEFLAG_SUBNODE) {
 			dprintf(("ntfs_ntlookupfile: diving\n"));
 
 			cn = *(cn_t *) (rdbuf + aoff +
 					iep->reclen - sizeof(cn_t));
 			rdsize = blsize;
 
 			error = ntfs_readattr(ntmp, ip, NTFS_A_INDX, "$I30",
 					     ntfs_cntob(cn), rdsize, rdbuf);
 			if (error)
 				goto fail;
 
 			error = ntfs_procfixups(ntmp, NTFS_INDXMAGIC,
 						rdbuf, rdsize);
 			if (error)
 				goto fail;
 
 			aoff = (((struct attr_indexalloc *) rdbuf)->ia_hdrsize +
 				0x18);
 		} else {
 			dprintf(("ntfs_ntlookupfile: nowhere to dive :-(\n"));
 			error = ENOENT;
 			break;
 		}
 	} while (1);
 
 	dprintf(("finish\n"));
 
 fail:
 	ntfs_ntvattrrele(vap);
 	ntfs_ntput(ip);
 	FREE(rdbuf, M_TEMP);
 	return (error);
 }
 
 /*
  * Check if name type is permitted to show.
  */
 int
 ntfs_isnamepermitted(
 		     struct ntfsmount * ntmp,
 		     struct attr_indexentry * iep)
 {
 
 	if (ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES)
 		return 1;
 
 	switch (iep->ie_fnametype) {
 	case 2:
 		ddprintf(("ntfs_isnamepermitted: skiped DOS name\n"));
 		return 0;
 	case 0: case 1: case 3:
 		return 1;
 	default:
 		printf("ntfs_isnamepermitted: " \
 		       "WARNING! Unknown file name type: %d\n",
 		       iep->ie_fnametype);
 		break;
 	}
 	return 0;
 }
 
 /*
  * Read ntfs dir like stream of attr_indexentry, not like btree of them.
  * This is done by scaning $BITMAP:$I30 for busy clusters and reading them.
  * Ofcouse $INDEX_ROOT:$I30 is read before. Last read values are stored in
  * fnode, so we can skip toward record number num almost immediatly.
  * Anyway this is rather slow routine. The problem is that we don't know
  * how many records are there in $INDEX_ALLOCATION:$I30 block.
  */
 int
 ntfs_ntreaddir(
 	       struct ntfsmount * ntmp,
 	       struct fnode * fp,
 	       u_int32_t num,
 	       struct attr_indexentry ** riepp)
 {
 	struct ntnode  *ip = FTONT(fp);
 	struct ntvattr *vap = NULL;	/* IndexRoot attribute */
 	struct ntvattr *bmvap = NULL;	/* BitMap attribute */
 	struct ntvattr *iavap = NULL;	/* IndexAllocation attribute */
 	caddr_t         rdbuf;		/* Buffer to read directory's blocks  */
 	u_char         *bmp = NULL;	/* Bitmap */
 	u_int32_t       blsize;		/* Index allocation size (2048) */
 	u_int32_t       rdsize;		/* Length of data to read */
 	u_int32_t       attrnum;	/* Current attribute type */
 	u_int32_t       cpbl = 1;	/* Clusters per directory block */
 	u_int32_t       blnum;
 	struct attr_indexentry *iep;
 	int             error = ENOENT;
 	u_int32_t       aoff, cnum;
 
 	dprintf(("ntfs_ntreaddir: read ino: %d, num: %d\n", ip->i_number, num));
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXROOT, "$I30", 0, &vap);
 	if (error)
 		return (ENOTDIR);
 
 	if (fp->f_dirblbuf == NULL) {
 		fp->f_dirblsz = vap->va_a_iroot->ir_size;
 		MALLOC(fp->f_dirblbuf, caddr_t,
 		       max(vap->va_datalen,fp->f_dirblsz), M_NTFSDIR, M_WAITOK);
 	}
 
 	blsize = fp->f_dirblsz;
 	rdbuf = fp->f_dirblbuf;
 
 	dprintf(("ntfs_ntreaddir: rdbuf: 0x%p, blsize: %d\n", rdbuf, blsize));
 
 	if (vap->va_a_iroot->ir_flag & NTFS_IRFLAG_INDXALLOC) {
 		error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDXBITMAP, "$I30",
 					0, &bmvap);
 		if (error) {
 			error = ENOTDIR;
 			goto fail;
 		}
 		MALLOC(bmp, u_char *, bmvap->va_datalen, M_TEMP, M_WAITOK);
 		error = ntfs_readattr(ntmp, ip, NTFS_A_INDXBITMAP, "$I30", 0,
 				       bmvap->va_datalen, bmp);
 		if (error)
 			goto fail;
 
 		error = ntfs_ntvattrget(ntmp, ip, NTFS_A_INDX, "$I30",
 					0, &iavap);
 		if (error) {
 			error = ENOTDIR;
 			goto fail;
 		}
 		cpbl = ntfs_btocn(blsize + ntfs_cntob(1) - 1);
 		dprintf(("ntfs_ntreaddir: indexalloc: %d, cpbl: %d\n",
 			 iavap->va_datalen, cpbl));
 	} else {
 		dprintf(("ntfs_ntreadidir: w/o BitMap and IndexAllocation\n"));
 		iavap = bmvap = NULL;
 		bmp = NULL;
 	}
 
 	/* Try use previous values */
 	if ((fp->f_lastdnum < num) && (fp->f_lastdnum != 0)) {
 		attrnum = fp->f_lastdattr;
 		aoff = fp->f_lastdoff;
 		blnum = fp->f_lastdblnum;
 		cnum = fp->f_lastdnum;
 	} else {
 		attrnum = NTFS_A_INDXROOT;
 		aoff = sizeof(struct attr_indexroot);
 		blnum = 0;
 		cnum = 0;
 	}
 
 	do {
 		dprintf(("ntfs_ntreaddir: scan: 0x%x, %d, %d, %d, %d\n",
 			 attrnum, (u_int32_t) blnum, cnum, num, aoff));
 		rdsize = (attrnum == NTFS_A_INDXROOT) ? vap->va_datalen : blsize;
 		error = ntfs_readattr(ntmp, ip, attrnum, "$I30",
 				   ntfs_cntob(blnum * cpbl), rdsize, rdbuf);
 		if (error)
 			goto fail;
 
 		if (attrnum == NTFS_A_INDX) {
 			error = ntfs_procfixups(ntmp, NTFS_INDXMAGIC,
 						rdbuf, rdsize);
 			if (error)
 				goto fail;
 		}
 		if (aoff == 0)
 			aoff = (attrnum == NTFS_A_INDX) ?
 				(0x18 + ((struct attr_indexalloc *) rdbuf)->ia_hdrsize) :
 				sizeof(struct attr_indexroot);
 
 		iep = (struct attr_indexentry *) (rdbuf + aoff);
 		while (!(iep->ie_flag & NTFS_IEFLAG_LAST) && (rdsize > aoff)) {
 			if (ntfs_isnamepermitted(ntmp, iep)) {
 				if (cnum >= num) {
 					fp->f_lastdnum = cnum;
 					fp->f_lastdoff = aoff;
 					fp->f_lastdblnum = blnum;
 					fp->f_lastdattr = attrnum;
 
 					*riepp = iep;
 
 					error = 0;
 					goto fail;
 				}
 				cnum++;
 			}
 			aoff += iep->reclen;
 			iep = (struct attr_indexentry *) (rdbuf + aoff);
 		}
 
 		if (iavap) {
 			if (attrnum == NTFS_A_INDXROOT)
 				blnum = 0;
 			else
 				blnum++;
 
 			while (ntfs_cntob(blnum * cpbl) < iavap->va_datalen) {
 				if (bmp[blnum >> 3] & (1 << (blnum & 3)))
 					break;
 				blnum++;
 			}
 
 			attrnum = NTFS_A_INDX;
 			aoff = 0;
 			if (ntfs_cntob(blnum * cpbl) >= iavap->va_datalen)
 				break;
 			dprintf(("ntfs_ntreaddir: blnum: %d\n", (u_int32_t) blnum));
 		}
 	} while (iavap);
 
 	*riepp = NULL;
 	fp->f_lastdnum = 0;
 
 fail:
 	if (vap)
 		ntfs_ntvattrrele(vap);
 	if (bmvap)
 		ntfs_ntvattrrele(bmvap);
 	if (iavap)
 		ntfs_ntvattrrele(iavap);
 	if (bmp)
 		FREE(bmp, M_TEMP);
 	ntfs_ntput(ip);
 	return (error);
 }
 
 /*
  * Convert NTFS times that are in 100 ns units and begins from
  * 1601 Jan 1 into unix times.
  */
 struct timespec
 ntfs_nttimetounix(
 		  u_int64_t nt)
 {
 	struct timespec t;
 
 	/* WindowNT times are in 100 ns and from 1601 Jan 1 */
 	t.tv_nsec = (nt % (1000 * 1000 * 10)) * 100;
 	t.tv_sec = nt / (1000 * 1000 * 10) -
 		369LL * 365LL * 24LL * 60LL * 60LL -
 		89LL * 1LL * 24LL * 60LL * 60LL;
 	return (t);
 }
 
 /*
  * Get file times from NTFS_A_NAME attribute.
  */
 int
 ntfs_times(
 	   struct ntfsmount * ntmp,
 	   struct ntnode * ip,
 	   ntfs_times_t * tm)
 {
 	struct ntvattr *vap;
 	int             error;
 
 	dprintf(("ntfs_times: ino: %d...\n", ip->i_number));
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 	error = ntfs_ntvattrget(ntmp, ip, NTFS_A_NAME, NULL, 0, &vap);
 	if (error) {
 		ntfs_ntput(ip);
 		return (error);
 	}
 	*tm = vap->va_a_name->n_times;
 	ntfs_ntvattrrele(vap);
 	ntfs_ntput(ip);
 
 	return (0);
 }
 
 /*
  * Get file sizes from corresponding attribute. 
  * 
  * ntnode under fnode should be locked.
  */
 int
 ntfs_filesize(
 	      struct ntfsmount * ntmp,
 	      struct fnode * fp,
 	      u_int64_t * size,
 	      u_int64_t * bytes)
 {
 	struct ntvattr *vap;
 	struct ntnode *ip = FTONT(fp);
 	u_int64_t       sz, bn;
 	int             error;
 
 	dprintf(("ntfs_filesize: ino: %d\n", ip->i_number));
 
 	error = ntfs_ntvattrget(ntmp, ip,
 		fp->f_attrtype, fp->f_attrname, 0, &vap);
 	if (error)
 		return (error);
 
 	bn = vap->va_allocated;
 	sz = vap->va_datalen;
 
 	dprintf(("ntfs_filesize: %d bytes (%d bytes allocated)\n",
 		(u_int32_t) sz, (u_int32_t) bn));
 
 	if (size)
 		*size = sz;
 	if (bytes)
 		*bytes = bn;
 
 	ntfs_ntvattrrele(vap);
 
 	return (0);
 }
 
 /*
  * This is one of write routine.
  *
  * ntnode should be locked.
  */
 int
 ntfs_writeattr_plain(
 		     struct ntfsmount * ntmp,
 		     struct ntnode * ip,
 		     u_int32_t attrnum,	
 		     char *attrname,
 		     off_t roff,
 		     size_t rsize,
 		     void *rdata,
 		     size_t * initp)
 {
 	size_t          init;
 	int             error = 0;
 	off_t           off = roff, left = rsize, towrite;
 	caddr_t         data = rdata;
 	struct ntvattr *vap;
 	*initp = 0;
 
 	while (left) {
 		error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname,
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
 		towrite = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_writeattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) towrite,
 			 (u_int32_t) vap->va_vcnstart,
 			 (u_int32_t) vap->va_vcnend));
 		error = ntfs_writentvattr_plain(ntmp, ip, vap,
 					 off - ntfs_cntob(vap->va_vcnstart),
 					 towrite, data, &init);
 		if (error) {
 			printf("ntfs_writeattr_plain: " \
 			       "ntfs_writentvattr_plain failed: o: %d, s: %d\n",
 			       (u_int32_t) off, (u_int32_t) towrite);
 			printf("ntfs_writeattr_plain: attrib: %d - %d\n",
 			       (u_int32_t) vap->va_vcnstart, 
 			       (u_int32_t) vap->va_vcnend);
 			ntfs_ntvattrrele(vap);
 			break;
 		}
 		ntfs_ntvattrrele(vap);
 		left -= towrite;
 		off += towrite;
 		data = data + towrite;
 		*initp += init;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of write routine.
  *
  * ntnode should be locked.
  */
 int
 ntfs_writentvattr_plain(
 			struct ntfsmount * ntmp,
 			struct ntnode * ip,
 			struct ntvattr * vap,
 			off_t roff,
 			size_t rsize,
 			void *rdata,
 			size_t * initp)
 {
 	int             error = 0;
 	int             off;
 
 	*initp = 0;
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		int             cnt;
 		cn_t            ccn, ccl, cn, left, cl;
 		caddr_t         data = rdata;
 		struct buf     *bp;
 		size_t          tocopy;
 
 		ddprintf(("ntfs_writentvattr_plain: data in run: %d chains\n",
 			 vap->va_vruncnt));
 
 		off = roff;
 		left = rsize;
 		ccl = 0;
 		ccn = 0;
 		cnt = 0;
 		while (left && (cnt < vap->va_vruncnt)) {
 			ccn = vap->va_vruncn[cnt];
 			ccl = vap->va_vruncl[cnt];
 
 			ddprintf(("ntfs_writentvattr_plain: " \
 				 "left %d, cn: 0x%x, cl: %d, off: %d\n", \
 				 (u_int32_t) left, (u_int32_t) ccn, \
 				 (u_int32_t) ccl, (u_int32_t) off));
 
 			if (ntfs_cntob(ccl) < off) {
 				off -= ntfs_cntob(ccl);
 				cnt++;
 				continue;
 			}
 			if (ccn || ip->i_number == NTFS_BOOTINO) { /* XXX */
 				ccl -= ntfs_btocn(off);
 				cn = ccn + ntfs_btocn(off);
 				off = ntfs_btocnoff(off);
 
 				while (left && ccl) {
 					tocopy = min(left,
 						  min(ntfs_cntob(ccl) - off,
 						      MAXBSIZE - off));
 					cl = ntfs_btocl(tocopy + off);
 					ddprintf(("ntfs_writentvattr_plain: " \
 						"write: cn: 0x%x cl: %d, " \
 						"off: %d len: %d, left: %d\n",
 						(u_int32_t) cn, 
 						(u_int32_t) cl, 
 						(u_int32_t) off, 
 						(u_int32_t) tocopy, 
 						(u_int32_t) left));
 					if ((off == 0) && 
 					    (tocopy == ntfs_cntob(cl))) {
 						bp = getblk(ntmp->ntm_devvp,
 							    ntfs_cntobn(cn),
 							    ntfs_cntob(cl),
 							    0, 0);
 						clrbuf(bp);
 					} else {
 						error = bread(ntmp->ntm_devvp,
 							      ntfs_cntobn(cn),
 							      ntfs_cntob(cl),
 							      NOCRED, &bp);
 						if (error) {
 							brelse(bp);
 							return (error);
 						}
 					}
 					memcpy(bp->b_data + off, data, tocopy);
 					bawrite(bp);
 					data = data + tocopy;
 					*initp += tocopy;
 					off = 0;
 					left -= tocopy;
 					cn += cl;
 					ccl -= cl;
 				}
 			}
 			cnt++;
 		}
 		if (left) {
 			printf("ntfs_writentvattr_plain: POSSIBLE RUN ERROR\n");
 			error = EINVAL;
 		}
 	} else {
 		printf("ntfs_writevattr_plain: CAN'T WRITE RES. ATTRIBUTE\n");
 		error = ENOTTY;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readntvattr_plain(
 			struct ntfsmount * ntmp,
 			struct ntnode * ip,
 			struct ntvattr * vap,
 			off_t roff,
 			size_t rsize,
 			void *rdata,
 			size_t * initp)
 {
 	int             error = 0;
 	int             off;
 
 	*initp = 0;
 	if (vap->va_flag & NTFS_AF_INRUN) {
 		int             cnt;
 		cn_t            ccn, ccl, cn, left, cl;
 		caddr_t         data = rdata;
 		struct buf     *bp;
 		size_t          tocopy;
 
 		ddprintf(("ntfs_readntvattr_plain: data in run: %d chains\n",
 			 vap->va_vruncnt));
 
 		off = roff;
 		left = rsize;
 		ccl = 0;
 		ccn = 0;
 		cnt = 0;
 		while (left && (cnt < vap->va_vruncnt)) {
 			ccn = vap->va_vruncn[cnt];
 			ccl = vap->va_vruncl[cnt];
 
 			ddprintf(("ntfs_readntvattr_plain: " \
 				 "left %d, cn: 0x%x, cl: %d, off: %d\n", \
 				 (u_int32_t) left, (u_int32_t) ccn, \
 				 (u_int32_t) ccl, (u_int32_t) off));
 
 			if (ntfs_cntob(ccl) < off) {
 				off -= ntfs_cntob(ccl);
 				cnt++;
 				continue;
 			}
 			if (ccn || ip->i_number == NTFS_BOOTINO) {
 				ccl -= ntfs_btocn(off);
 				cn = ccn + ntfs_btocn(off);
 				off = ntfs_btocnoff(off);
 
 				while (left && ccl) {
 					tocopy = min(left,
 						  min(ntfs_cntob(ccl) - off,
 						      MAXBSIZE - off));
 					cl = ntfs_btocl(tocopy + off);
 					ddprintf(("ntfs_readntvattr_plain: " \
 						"read: cn: 0x%x cl: %d, " \
 						"off: %d len: %d, left: %d\n",
 						(u_int32_t) cn, 
 						(u_int32_t) cl, 
 						(u_int32_t) off, 
 						(u_int32_t) tocopy, 
 						(u_int32_t) left));
 					error = bread(ntmp->ntm_devvp,
 						      ntfs_cntobn(cn),
 						      ntfs_cntob(cl),
 						      NOCRED, &bp);
 					if (error) {
 						brelse(bp);
 						return (error);
 					}
 					memcpy(data, bp->b_data + off, tocopy);
 					brelse(bp);
 					data = data + tocopy;
 					*initp += tocopy;
 					off = 0;
 					left -= tocopy;
 					cn += cl;
 					ccl -= cl;
 				}
 			} else {
 				tocopy = min(left, ntfs_cntob(ccl) - off);
 				ddprintf(("ntfs_readntvattr_plain: "
 					"sparce: ccn: 0x%x ccl: %d, off: %d, " \
 					" len: %d, left: %d\n", 
 					(u_int32_t) ccn, (u_int32_t) ccl, 
 					(u_int32_t) off, (u_int32_t) tocopy, 
 					(u_int32_t) left));
 				left -= tocopy;
 				off = 0;
 				bzero(data, tocopy);
 				data = data + tocopy;
 			}
 			cnt++;
 		}
 		if (left) {
 			printf("ntfs_readntvattr_plain: POSSIBLE RUN ERROR\n");
 			error = E2BIG;
 		}
 	} else {
 		ddprintf(("ntfs_readnvattr_plain: data is in mft record\n"));
 		memcpy(rdata, vap->va_datap + roff, rsize);
 		*initp += rsize;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readattr_plain(
 		     struct ntfsmount * ntmp,
 		     struct ntnode * ip,
 		     u_int32_t attrnum,	
 		     char *attrname,
 		     off_t roff,
 		     size_t rsize,
 		     void *rdata,
 		     size_t * initp)
 {
 	size_t          init;
 	int             error = 0;
 	off_t           off = roff, left = rsize, toread;
 	caddr_t         data = rdata;
 	struct ntvattr *vap;
 	*initp = 0;
 
 	while (left) {
 		error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname,
 					ntfs_btocn(off), &vap);
 		if (error)
 			return (error);
 		toread = min(left, ntfs_cntob(vap->va_vcnend + 1) - off);
 		ddprintf(("ntfs_readattr_plain: o: %d, s: %d (%d - %d)\n",
 			 (u_int32_t) off, (u_int32_t) toread,
 			 (u_int32_t) vap->va_vcnstart,
 			 (u_int32_t) vap->va_vcnend));
 		error = ntfs_readntvattr_plain(ntmp, ip, vap,
 					 off - ntfs_cntob(vap->va_vcnstart),
 					 toread, data, &init);
 		if (error) {
 			printf("ntfs_readattr_plain: " \
 			       "ntfs_readntvattr_plain failed: o: %d, s: %d\n",
 			       (u_int32_t) off, (u_int32_t) toread);
 			printf("ntfs_readattr_plain: attrib: %d - %d\n",
 			       (u_int32_t) vap->va_vcnstart, 
 			       (u_int32_t) vap->va_vcnend);
 			ntfs_ntvattrrele(vap);
 			break;
 		}
 		ntfs_ntvattrrele(vap);
 		left -= toread;
 		off += toread;
 		data = data + toread;
 		*initp += init;
 	}
 
 	return (error);
 }
 
 /*
  * This is one of read routines.
  *
  * ntnode should be locked.
  */
 int
 ntfs_readattr(
 	       struct ntfsmount * ntmp,
 	       struct ntnode * ip,
 	       u_int32_t attrnum,
 	       char *attrname,
 	       off_t roff,
 	       size_t rsize,
 	       void *rdata)
 {
 	int             error = 0;
 	struct ntvattr *vap;
 	size_t          init;
 
 	ddprintf(("ntfs_readattr: reading %d: 0x%x, from %d size %d bytes\n",
 	       ip->i_number, attrnum, (u_int32_t) roff, (u_int32_t) rsize));
 
 	error = ntfs_ntvattrget(ntmp, ip, attrnum, attrname, 0, &vap);
 	if (error)
 		return (error);
 
 	if ((roff > vap->va_datalen) ||
 	    (roff + rsize > vap->va_datalen)) {
 		ddprintf(("ntfs_readattr: offset too big\n"));
 		ntfs_ntvattrrele(vap);
 		return (E2BIG);
 	}
 	if (vap->va_compression && vap->va_compressalg) {
 		u_int8_t       *cup;
 		u_int8_t       *uup;
 		off_t           off = roff, left = rsize, tocopy;
 		caddr_t         data = rdata;
 		cn_t            cn;
 
 		ddprintf(("ntfs_ntreadattr: compression: %d\n",
 			 vap->va_compressalg));
 
 		MALLOC(cup, u_int8_t *, ntfs_cntob(NTFS_COMPUNIT_CL),
 		       M_NTFSDECOMP, M_WAITOK);
 		MALLOC(uup, u_int8_t *, ntfs_cntob(NTFS_COMPUNIT_CL),
 		       M_NTFSDECOMP, M_WAITOK);
 
 		cn = (ntfs_btocn(roff)) & (~(NTFS_COMPUNIT_CL - 1));
 		off = roff - ntfs_cntob(cn);
 
 		while (left) {
 			error = ntfs_readattr_plain(ntmp, ip, attrnum,
 						  attrname, ntfs_cntob(cn),
 					          ntfs_cntob(NTFS_COMPUNIT_CL),
 						  cup, &init);
 			if (error)
 				break;
 
 			tocopy = min(left, ntfs_cntob(NTFS_COMPUNIT_CL) - off);
 
 			if (init == ntfs_cntob(NTFS_COMPUNIT_CL)) {
 				memcpy(data, cup + off, tocopy);
 			} else if (init == 0) {
 				bzero(data, tocopy);
 			} else {
 				error = ntfs_uncompunit(ntmp, uup, cup);
 				if (error)
 					break;
 				memcpy(data, uup + off, tocopy);
 			}
 
 			left -= tocopy;
 			data = data + tocopy;
 			off += tocopy - ntfs_cntob(NTFS_COMPUNIT_CL);
 			cn += NTFS_COMPUNIT_CL;
 		}
 
 		FREE(uup, M_NTFSDECOMP);
 		FREE(cup, M_NTFSDECOMP);
 	} else
 		error = ntfs_readattr_plain(ntmp, ip, attrnum, attrname,
 					     roff, rsize, rdata, &init);
 	ntfs_ntvattrrele(vap);
 	return (error);
 }
 
 #if UNUSED_CODE
 int
 ntfs_parserun(
 	      cn_t * cn,
 	      cn_t * cl,
 	      u_int8_t * run,
 	      u_long len,
 	      u_long *off)
 {
 	u_int8_t        sz;
 	int             i;
 
 	if (NULL == run) {
 		printf("ntfs_parsetun: run == NULL\n");
 		return (EINVAL);
 	}
 	sz = run[(*off)++];
 	if (0 == sz) {
 		printf("ntfs_parserun: trying to go out of run\n");
 		return (E2BIG);
 	}
 	*cl = 0;
 	if ((sz & 0xF) > 8 || (*off) + (sz & 0xF) > len) {
 		printf("ntfs_parserun: " \
 		       "bad run: length too big: sz: 0x%02x (%ld < %ld + sz)\n",
 		       sz, len, *off);
 		return (EINVAL);
 	}
 	for (i = 0; i < (sz & 0xF); i++)
 		*cl += (u_int32_t) run[(*off)++] << (i << 3);
 
 	sz >>= 4;
 	if ((sz & 0xF) > 8 || (*off) + (sz & 0xF) > len) {
 		printf("ntfs_parserun: " \
 		       "bad run: length too big: sz: 0x%02x (%ld < %ld + sz)\n",
 		       sz, len, *off);
 		return (EINVAL);
 	}
 	for (i = 0; i < (sz & 0xF); i++)
 		*cn += (u_int32_t) run[(*off)++] << (i << 3);
 
 	return (0);
 }
 #endif
 
 /*
  * Process fixup routine on given buffer.
  */
 int
 ntfs_procfixups(
 		struct ntfsmount * ntmp,
 		u_int32_t magic,
 		caddr_t buf,
 		size_t len)
 {
 	struct fixuphdr *fhp = (struct fixuphdr *) buf;
 	int             i;
 	u_int16_t       fixup;
 	u_int16_t      *fxp;
 	u_int16_t      *cfxp;
 
 	if (fhp->fh_magic != magic) {
 		printf("ntfs_procfixups: magic doesn't match: %08x != %08x\n",
 		       fhp->fh_magic, magic);
 		return (EINVAL);
 	}
 	if ((fhp->fh_fnum - 1) * ntmp->ntm_bps != len) {
 		printf("ntfs_procfixups: " \
 		       "bad fixups number: %d for %d bytes block\n", 
 		       fhp->fh_fnum, len);
 		return (EINVAL);
 	}
 	if (fhp->fh_foff >= ntmp->ntm_spc * ntmp->ntm_mftrecsz * ntmp->ntm_bps) {
 		printf("ntfs_procfixups: invalid offset: %x", fhp->fh_foff);
 		return (EINVAL);
 	}
 	fxp = (u_int16_t *) (buf + fhp->fh_foff);
 	cfxp = (u_int16_t *) (buf + ntmp->ntm_bps - 2);
 	fixup = *fxp++;
 	for (i = 1; i < fhp->fh_fnum; i++, fxp++) {
 		if (*cfxp != fixup) {
 			printf("ntfs_procfixups: fixup %d doesn't match\n", i);
 			return (EINVAL);
 		}
 		*cfxp = *fxp;
 		((caddr_t) cfxp) += ntmp->ntm_bps;
 	}
 	return (0);
 }
 
 #if UNUSED_CODE
 int
 ntfs_runtocn(
 	     cn_t * cn,	
 	     struct ntfsmount * ntmp,
 	     u_int8_t * run,
 	     u_long len,
 	     cn_t vcn)
 {
 	cn_t            ccn = 0;
 	cn_t            ccl = 0;
 	u_long          off = 0;
 	int             error = 0;
 
 #if NTFS_DEBUG
 	int             i;
 	printf("ntfs_runtocn: run: 0x%p, %ld bytes, vcn:%ld\n",
 		run, len, (u_long) vcn);
 	printf("ntfs_runtocn: run: ");
 	for (i = 0; i < len; i++)
 		printf("0x%02x ", run[i]);
 	printf("\n");
 #endif
 
 	if (NULL == run) {
 		printf("ntfs_runtocn: run == NULL\n");
 		return (EINVAL);
 	}
 	do {
 		if (run[off] == 0) {
 			printf("ntfs_runtocn: vcn too big\n");
 			return (E2BIG);
 		}
 		vcn -= ccl;
 		error = ntfs_parserun(&ccn, &ccl, run, len, &off);
 		if (error) {
 			printf("ntfs_runtocn: ntfs_parserun failed\n");
 			return (error);
 		}
 	} while (ccl <= vcn);
 	*cn = ccn + vcn;
 	return (0);
 }
 #endif
Index: head/sys/ntfs/ntfs_vfsops.c
===================================================================
--- head/sys/ntfs/ntfs_vfsops.c	(revision 49534)
+++ head/sys/ntfs/ntfs_vfsops.c	(revision 49535)
@@ -1,996 +1,994 @@
 /*	$NetBSD: ntfs_vfsops.c,v 1.2 1999/05/06 15:43:20 christos Exp $	*/
 
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_vfsops.c,v 1.6 1999/05/12 09:43:04 semenu Exp $
+ *	$Id: ntfs_vfsops.c,v 1.7 1999/05/31 11:28:30 phk Exp $
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
-
-#include <miscfs/specfs/specdev.h>
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_vfsops.h>
 #include <ntfs/ntfs_ihash.h>
 #include <ntfs/ntfs_extern.h>
 #include <ntfs/ntfsmount.h>
 
 #if defined(__FreeBSD__)
 MALLOC_DEFINE(M_NTFSMNT, "NTFS mount", "NTFS mount structure");
 MALLOC_DEFINE(M_NTFSNTNODE,"NTFS ntnode",  "NTFS ntnode information");
 MALLOC_DEFINE(M_NTFSFNODE,"NTFS fnode",  "NTFS fnode information");
 MALLOC_DEFINE(M_NTFSDIR,"NTFS dir",  "NTFS dir buffer");
 #endif
 
 #if defined(__FreeBSD__)
 static int	ntfs_mount __P((struct mount *, char *, caddr_t,
 				struct nameidata *, struct proc *));
 #else
 static int	ntfs_mount __P((struct mount *, const char *, void *,
 				struct nameidata *, struct proc *));
 #endif
 static int	ntfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				   struct proc *));
 static int	ntfs_root __P((struct mount *, struct vnode **));
 static int	ntfs_start __P((struct mount *, int, struct proc *));
 static int	ntfs_statfs __P((struct mount *, struct statfs *,
 				 struct proc *));
 static int	ntfs_sync __P((struct mount *, int, struct ucred *,
 			       struct proc *));
 static int	ntfs_unmount __P((struct mount *, int, struct proc *));
 static int	ntfs_vget __P((struct mount *mp, ino_t ino,
 			       struct vnode **vpp));
 static int	ntfs_mountfs __P((register struct vnode *, struct mount *, 
 				  struct ntfs_args *, struct proc *));
 static int	ntfs_vptofh __P((struct vnode *, struct fid *));
 
 #if defined(__FreeBSD__)
 static int	ntfs_init __P((struct vfsconf *));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct sockaddr *, struct vnode **,
 				 int *, struct ucred **));
 #elif defined(__NetBSD__)
 static void	ntfs_init __P((void));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct vnode **));
 static int	ntfs_checkexp __P((struct mount *, struct mbuf *,
 				   int *, struct ucred **));
 static int	ntfs_mountroot __P((void));
 static int	ntfs_sysctl __P((int *, u_int, void *, size_t *, void *,
 				 size_t, struct proc *));
 #else
 static int	ntfs_init __P((void));
 static int	ntfs_fhtovp __P((struct mount *, struct fid *,
 				 struct mbuf *, struct vnode **,
 				 int *, struct ucred **));
 #endif
 
 #ifdef __NetBSD__
 /*ARGSUSED*/
 static int
 ntfs_checkexp(mp, nam, exflagsp, credanonp)
 	register struct mount *mp;
 	struct mbuf *nam;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 
 	return (EINVAL);
 }
 
 /*ARGSUSED*/
 static int
 ntfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
 	int *name;
 	u_int namelen;
 	void *oldp;
 	size_t *oldlenp;
 	void *newp;
 	size_t newlen;
 	struct proc *p;
 {
 	return (EINVAL);
 }
 
 static int
 ntfs_mountroot()
 {
 	return (EINVAL);
 }
 #endif
 
 #if defined(__FreeBSD__)
 static int
 ntfs_init (
 	struct vfsconf *vcp )
 #elif defined(__NetBSD__)
 static void
 ntfs_init ()
 #else
 static int
 ntfs_init ()
 #endif
 {
 	ntfs_nthashinit();
 #if !defined(__NetBSD__)
 	return 0;
 #endif
 }
 
 static int
 ntfs_mount ( 
 	struct mount *mp,
 #if defined(__FreeBSD__)
 	char *path,
 	caddr_t data,
 #else
 	const char *path,
 	void *data,
 #endif
 	struct nameidata *ndp,
 	struct proc *p )
 {
 	u_int		size;
 	int		err = 0;
 	struct vnode	*devvp;
 	struct ntfs_args args;
 
 	/*
 	 * Use NULL path to flag a root mount
 	 */
 	if( path == NULL) {
 		/*
 		 ***
 		 * Mounting root file system
 		 ***
 		 */
 	
 		/* Get vnode for root device*/
 		if( bdevvp( rootdev, &rootvp))
 			panic("ffs_mountroot: can't setup bdevvp for root");
 
 		/*
 		 * FS specific handling
 		 */
 		mp->mnt_flag |= MNT_RDONLY;	/* XXX globally applicable?*/
 
 		/*
 		 * Attempt mount
 		 */
 		if( ( err = ntfs_mountfs(rootvp, mp, &args, p)) != 0) {
 			/* fs specific cleanup (if any)*/
 			goto error_1;
 		}
 
 		goto dostatfs;		/* success*/
 
 	}
 
 	/*
 	 ***
 	 * Mounting non-root file system or updating a file system
 	 ***
 	 */
 
 	/* copy in user arguments*/
 	err = copyin(data, (caddr_t)&args, sizeof (struct ntfs_args));
 	if (err)
 		goto error_1;		/* can't get arguments*/
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		printf("ntfs_mount(): MNT_UPDATE not supported\n");
 		err = EINVAL;
 		goto error_1;
 
 #if 0
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		err = 0;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (vfs_busy(mp)) {
 				err = EBUSY;
 				goto error_1;
 			}
 			err = ffs_flushfiles(mp, flags, p);
 			vfs_unbusy(mp);
 		}
 		if (!err && (mp->mnt_flag & MNT_RELOAD))
 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (err) {
 			goto error_1;
 		}
 		if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) {
 			if (!fs->fs_clean) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
 				} else {
 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
 					    fs->fs_fsmnt);
 					err = EPERM;
 					goto error_1;
 				}
 			}
 			fs->fs_ronly = 0;
 		}
 		if (fs->fs_ronly == 0) {
 			fs->fs_clean = 0;
 			ffs_sbupdate(ump, MNT_WAIT);
 		}
 		/* if not updating name...*/
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.  Jumping to "success"
 			 * will return the vfs_export() error code.
 			 */
 			err = vfs_export(mp, &ump->um_export, &args.export);
 			goto success;
 		}
 #endif
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	err = namei(ndp);
 	if (err) {
 		/* can't get devvp!*/
 		goto error_1;
 	}
 
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		err = ENOTBLK;
 		goto error_2;
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		err = ENXIO;
 		goto error_2;
 	}
 	if (mp->mnt_flag & MNT_UPDATE) {
 #if 0
 		/*
 		 ********************
 		 * UPDATE
 		 ********************
 		 */
 
 		if (devvp != ntmp->um_devvp)
 			err = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 		/*
 		 * Update device name only on success
 		 */
 		if( !err) {
 			/* Save "mounted from" info for mount point (NULL pad)*/
 			copyinstr(	args.fspec,
 					mp->mnt_stat.f_mntfromname,
 					MNAMELEN - 1,
 					&size);
 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 		}
 #endif
 	} else {
 		/*
 		 ********************
 		 * NEW MOUNT
 		 ********************
 		 */
 
 		/*
 		 * Since this is a new mount, we want the names for
 		 * the device and the mount point copied in.  If an
 		 * error occurs,  the mountpoint is discarded by the
 		 * upper level code.
 		 */
 		/* Save "last mounted on" info for mount point (NULL pad)*/
 		copyinstr(	path,				/* mount point*/
 				mp->mnt_stat.f_mntonname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 
 		/* Save "mounted from" info for mount point (NULL pad)*/
 		copyinstr(	args.fspec,			/* device name*/
 				mp->mnt_stat.f_mntfromname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 
 		err = ntfs_mountfs(devvp, mp, &args, p);
 	}
 	if (err) {
 		goto error_2;
 	}
 
 dostatfs:
 	/*
 	 * Initialize FS stat information in mount struct; uses both
 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
 	 *
 	 * This code is common to root and non-root mounts
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 
 	goto success;
 
 
 error_2:	/* error with devvp held*/
 
 	/* release devvp before failing*/
 	vrele(devvp);
 
 error_1:	/* no state to back out*/
 
 success:
 	return( err);
 }
 
 /*
  * Common code for mount and mountroot
  */
 int
 ntfs_mountfs(devvp, mp, argsp, p)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct ntfs_args *argsp;
 	struct proc *p;
 {
 	struct buf *bp;
 	struct ntfsmount *ntmp;
 	dev_t dev = devvp->v_rdev;
 	int error, ronly, ncount, i;
 	struct vnode *vp;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 #if defined(__FreeBSD__)
 	if (devvp->v_object)
 		ncount -= 1;
 #endif
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
 #if defined(__FreeBSD__)
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 #else
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 #endif
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp = NULL;
 
 	error = bread(devvp, BBLOCK, BBSIZE, NOCRED, &bp);
 	if (error)
 		goto out;
 	ntmp = malloc( sizeof *ntmp, M_NTFSMNT, M_WAITOK );
 	bzero( ntmp, sizeof *ntmp );
 	bcopy( bp->b_data, &ntmp->ntm_bootfile, sizeof(struct bootfile) );
 	brelse( bp );
 	bp = NULL;
 
 	if (strncmp(ntmp->ntm_bootfile.bf_sysid, NTFS_BBID, NTFS_BBIDLEN)) {
 		error = EINVAL;
 		printf("ntfs_mountfs: invalid boot block\n");
 		goto out;
 	}
 
 	{
 		int8_t cpr = ntmp->ntm_mftrecsz;
 		if( cpr > 0 )
 			ntmp->ntm_bpmftrec = ntmp->ntm_spc * cpr;
 		else
 			ntmp->ntm_bpmftrec = (1 << (-cpr)) / ntmp->ntm_bps;
 	}
 	dprintf(("ntfs_mountfs(): bps: %d, spc: %d, media: %x, mftrecsz: %d (%d sects)\n",
 		ntmp->ntm_bps,ntmp->ntm_spc,ntmp->ntm_bootfile.bf_media,
 		ntmp->ntm_mftrecsz,ntmp->ntm_bpmftrec));
 	dprintf(("ntfs_mountfs(): mftcn: 0x%x|0x%x\n",
 		(u_int32_t)ntmp->ntm_mftcn,(u_int32_t)ntmp->ntm_mftmirrcn));
 
 	ntmp->ntm_mountp = mp;
 	ntmp->ntm_dev = dev;
 	ntmp->ntm_devvp = devvp;
 	ntmp->ntm_uid = argsp->uid;
 	ntmp->ntm_gid = argsp->gid;
 	ntmp->ntm_mode = argsp->mode;
 	ntmp->ntm_flag = argsp->flag;
 	mp->mnt_data = (qaddr_t)ntmp;
 
 	dprintf(("ntfs_mountfs(): case-%s,%s uid: %d, gid: %d, mode: %o\n",
 		(ntmp->ntm_flag & NTFS_MFLAG_CASEINS)?"insens.":"sens.",
 		(ntmp->ntm_flag & NTFS_MFLAG_ALLNAMES)?" allnames,":"",
 		ntmp->ntm_uid, ntmp->ntm_gid, ntmp->ntm_mode));
 
 	/*
 	 * We read in some system nodes to do not allow 
 	 * reclaim them and to have everytime access to them.
 	 */ 
 	{
 		int pi[3] = { NTFS_MFTINO, NTFS_ROOTINO, NTFS_BITMAPINO };
 		for (i=0; i<3; i++) {
 			error = VFS_VGET(mp, pi[i], &(ntmp->ntm_sysvn[pi[i]]));
 			if(error)
 				goto out1;
 			ntmp->ntm_sysvn[pi[i]]->v_flag |= VSYSTEM;
 			VREF(ntmp->ntm_sysvn[pi[i]]);
 			vput(ntmp->ntm_sysvn[pi[i]]);
 		}
 	}
 
 	/*
 	 * Read in WHOLE lowcase -> upcase translation
 	 * file.
 	 */
 	MALLOC(ntmp->ntm_upcase, wchar *, 65536 * sizeof(wchar),
 		M_NTFSMNT, M_WAITOK);
 
 	error = VFS_VGET(mp, NTFS_UPCASEINO, &vp);
 	if(error) 
 		goto out1;
 	error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			0, 65536*sizeof(wchar), ntmp->ntm_upcase);
 	vput(vp);
 	if(error) 
 		goto out1;
 
 	/*
 	 * Scan $BitMap and count free clusters
 	 */
 	error = ntfs_calccfree(ntmp, &ntmp->ntm_cfree);
 	if(error)
 		goto out1;
 
 	/*
 	 * Read and translate to internal format attribute
 	 * definition file. 
 	 */
 	{
 		int num,j;
 		struct attrdef ad;
 
 		/* Open $AttrDef */
 		error = VFS_VGET(mp, NTFS_ATTRDEFINO, &vp );
 		if(error) 
 			goto out1;
 
 		/* Count valid entries */
 		for(num=0;;num++) {
 			error = ntfs_readattr(ntmp, VTONT(vp),
 					NTFS_A_DATA, NULL,
 					num * sizeof(ad), sizeof(ad),
 					&ad);
 			if (error)
 				goto out1;
 			if (ad.ad_name[0] == 0)
 				break;
 		}
 
 		/* Alloc memory for attribute definitions */
 		MALLOC(ntmp->ntm_ad, struct ntvattrdef *,
 			num * sizeof(struct ntvattrdef),
 			M_NTFSMNT, M_WAITOK);
 
 		ntmp->ntm_adnum = num;
 
 		/* Read them and translate */
 		for(i=0;i<num;i++){
 			error = ntfs_readattr(ntmp, VTONT(vp),
 					NTFS_A_DATA, NULL,
 					i * sizeof(ad), sizeof(ad),
 					&ad);
 			if (error)
 				goto out1;
 			j = 0;
 			do {
 				ntmp->ntm_ad[i].ad_name[j] = ad.ad_name[j];
 			} while(ad.ad_name[j++]);
 			ntmp->ntm_ad[i].ad_namelen = j - 1;
 			ntmp->ntm_ad[i].ad_type = ad.ad_type;
 		}
 
 		vput(vp);
 	}
 
 	mp->mnt_stat.f_fsid.val[0] = dev2udev(dev);
 #if defined(__FreeBSD__)
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 #else
 	mp->mnt_stat.f_fsid.val[1] = makefstype(MOUNT_NTFS);
 #endif
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 #if defined(__FreeBSD__)
 	devvp->v_specmountpoint = mp;
 #else
 	devvp->v_specflags |= SI_MOUNTEDON;
 #endif
 	return (0);
 
 out1:
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]);
 
 	if (vflush(mp,NULLVP,0))
 		printf("ntfs_mountfs: vflush failed\n");
 
 out:
 #if defined(__FreeBSD__)
 	devvp->v_specmountpoint = NULL;
 #else
 	devvp->v_specflags &= ~SI_MOUNTEDON;
 #endif
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	return (error);
 }
 
 static int
 ntfs_start (
 	struct mount *mp,
 	int flags,
 	struct proc *p )
 {
 	return (0);
 }
 
 static int
 ntfs_unmount( 
 	struct mount *mp,
 	int mntflags,
 	struct proc *p)
 {
 	register struct ntfsmount *ntmp;
 	int error, ronly = 0, flags, i;
 
 	dprintf(("ntfs_unmount: unmounting...\n"));
 	ntmp = VFSTONTFS(mp);
 
 	flags = 0;
 	if(mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 
 	dprintf(("ntfs_unmount: vflushing...\n"));
 	error = vflush(mp,NULLVP,flags | SKIPSYSTEM);
 	if (error) {
 		printf("ntfs_unmount: vflush failed: %d\n",error);
 		return (error);
 	}
 
 	/* Check if only system vnodes are rest */
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		 if((ntmp->ntm_sysvn[i]) && 
 		    (ntmp->ntm_sysvn[i]->v_usecount > 1)) return (EBUSY);
 
 	/* Derefernce all system vnodes */
 	for(i=0;i<NTFS_SYSNODESNUM;i++)
 		 if(ntmp->ntm_sysvn[i]) vrele(ntmp->ntm_sysvn[i]);
 
 	/* vflush system vnodes */
 	error = vflush(mp,NULLVP,flags);
 	if (error)
 		printf("ntfs_unmount: vflush failed(sysnodes): %d\n",error);
 
 #if defined(__FreeBSD__)
 	ntmp->ntm_devvp->v_specmountpoint = NULL;
 #else
 	ntmp->ntm_devvp->v_specflags &= ~SI_MOUNTEDON;
 #endif
 
 	vinvalbuf(ntmp->ntm_devvp, V_SAVE, NOCRED, p, 0, 0);
 	error = VOP_CLOSE(ntmp->ntm_devvp, ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 
 	vrele(ntmp->ntm_devvp);
 
 	dprintf(("ntfs_umount: freeing memory...\n"));
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	FREE(ntmp->ntm_ad, M_NTFSMNT);
 	FREE(ntmp->ntm_upcase, M_NTFSMNT);
 	FREE(ntmp, M_NTFSMNT);
 	return (error);
 }
 
 static int
 ntfs_root(
 	struct mount *mp,
 	struct vnode **vpp )
 {
 	struct vnode *nvp;
 	int error = 0;
 
 	dprintf(("ntfs_root(): sysvn: %p\n",
 		VFSTONTFS(mp)->ntm_sysvn[NTFS_ROOTINO]));
 	error = VFS_VGET(mp, (ino_t)NTFS_ROOTINO, &nvp);
 	if(error) {
 		printf("ntfs_root: VFS_VGET failed: %d\n",error);
 		return (error);
 	}
 
 	*vpp = nvp;
 	return (0);
 }
 
 static int
 ntfs_quotactl ( 
 	struct mount *mp,
 	int cmds,
 	uid_t uid,
 	caddr_t arg,
 	struct proc *p)
 {
 	printf("\nntfs_quotactl():\n");
 	return EOPNOTSUPP;
 }
 
 int
 ntfs_calccfree(
 	struct ntfsmount *ntmp,
 	cn_t *cfreep)
 {
 	struct vnode *vp;
 	u_int8_t *tmp;
 	int j, error;
 	long cfree = 0;
 	size_t bmsize, i;
 
 	vp = ntmp->ntm_sysvn[NTFS_BITMAPINO];
 
 	bmsize = VTOF(vp)->f_size;
 
 	MALLOC(tmp, u_int8_t *, bmsize, M_TEMP, M_WAITOK);
 
 	error = ntfs_readattr(ntmp, VTONT(vp), NTFS_A_DATA, NULL,
 			       0, bmsize, tmp);
 	if(error) {
 		FREE(tmp, M_TEMP);
 		return (error);
 	}
 
 	for(i=0;i<bmsize;i++)
 		for(j=0;j<8;j++)
 			if(~tmp[i] & (1 << j)) cfree++;
 
 	FREE(tmp, M_TEMP);
 
 	*cfreep = cfree;
 
 	return(0);
 }
 
 static int
 ntfs_statfs(
 	struct mount *mp,
 	struct statfs *sbp,
 	struct proc *p)
 {
 	struct ntfsmount *ntmp = VFSTONTFS(mp);
 	u_int64_t mftsize,mftallocated;
 
 	dprintf(("ntfs_statfs():\n"));
 
 	mftsize = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_size;
 	mftallocated = VTOF(ntmp->ntm_sysvn[NTFS_MFTINO])->f_allocated;
 
 #if defined(__FreeBSD__)
 	sbp->f_type = mp->mnt_vfc->vfc_typenum;
 #elif defined(__NetBSD__)
 	sbp->f_type = 0;
 #else
 	sbp->f_type = MOUNT_NTFS;
 #endif
 	sbp->f_bsize = ntmp->ntm_bps;
 	sbp->f_iosize = ntmp->ntm_bps * ntmp->ntm_spc;
 	sbp->f_blocks = ntmp->ntm_bootfile.bf_spv;
 	sbp->f_bfree = sbp->f_bavail = ntfs_cntobn(ntmp->ntm_cfree);
 	sbp->f_ffree = sbp->f_bfree / ntmp->ntm_bpmftrec;
 	sbp->f_files = mftallocated / ntfs_bntob(ntmp->ntm_bpmftrec) +
 		       sbp->f_ffree;
 	if (sbp != &mp->mnt_stat) {
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	sbp->f_flags = mp->mnt_flag;
 	
 	return (0);
 }
 
 static int
 ntfs_sync (
 	struct mount *mp,
 	int waitfor,
 	struct ucred *cred,
 	struct proc *p)
 {
 	/*dprintf(("ntfs_sync():\n"));*/
 	return (0);
 }
 
 /*ARGSUSED*/
 static int
 ntfs_fhtovp(
 #if defined(__FreeBSD__)
 	struct mount *mp,
 	struct fid *fhp,
 	struct sockaddr *nam,
 	struct vnode **vpp,
 	int *exflagsp,
 	struct ucred **credanonp)
 #elif defined(__NetBSD__)
 	struct mount *mp,
 	struct fid *fhp,
 	struct vnode **vpp)
 #else
 	struct mount *mp,
 	struct fid *fhp,
 	struct mbuf *nam,
 	struct vnode **vpp,
 	int *exflagsp,
 	struct ucred **credanonp)
 #endif
 {
 	printf("\ntfs_fhtovp():\n");
 	return 0;
 }
 
 static int
 ntfs_vptofh(
 	struct vnode *vp,
 	struct fid *fhp)
 {
 	printf("ntfs_vptofh():\n");
 	return EOPNOTSUPP;
 }
 
 int
 ntfs_vgetex(
 	struct mount *mp,
 	ino_t ino,
 	u_int32_t attrtype,
 	char *attrname,
 	u_long lkflags,
 	u_long flags,
 	struct proc *p,
 	struct vnode **vpp) 
 {
 	int error;
 	register struct ntfsmount *ntmp;
 	struct ntnode *ip;
 	struct fnode *fp;
 	struct vnode *vp;
 
 	dprintf(("ntfs_vgetex: ino: %d, attr: 0x%x:%s, lkf: 0x%x, f: 0x%x\n",
 		ino, attrtype, attrname?attrname:"", lkflags, flags ));
 
 	ntmp = VFSTONTFS(mp);
 	*vpp = NULL;
 
 	/* Get ntnode */
 	error = ntfs_ntlookup(ntmp, ino, &ip);
 	if (error) {
 		printf("ntfs_vget: ntfs_ntget failed\n");
 		return (error);
 	}
 
 	/* It may be not initialized fully, so force load it */
 	if (!(flags & VG_DONTLOADIN) && !(ip->i_flag & IN_LOADED)) {
 		error = ntfs_loadntnode(ntmp, ip);
 		if(error) {
 			printf("ntfs_vget: CAN'T LOAD ATTRIBUTES FOR INO: %d\n",
 			       ip->i_number);
 			ntfs_ntput(ip);
 			return (error);
 		}
 	}
 
 	error = ntfs_fget(ntmp, ip, attrtype, attrname, &fp);
 	if (error) {
 		printf("ntfs_vget: ntfs_fget failed\n");
 		ntfs_ntput(ip);
 		return (error);
 	}
 
 	if (!(flags & VG_DONTVALIDFN) && !(fp->f_flag & FN_VALID)) {
 		if ((ip->i_frflag & NTFS_FRFLAG_DIR) &&
 		    (fp->f_attrtype == 0x80 && fp->f_attrname == NULL)) {
 			fp->f_type = VDIR;
 		} else if(flags & VG_EXT) {
 			fp->f_type = VNON;
 
 			fp->f_size =fp->f_allocated = 0;
 		} else {
 			fp->f_type = VREG;	
 
 			error = ntfs_filesize(ntmp, fp, 
 					      &fp->f_size, &fp->f_allocated);
 			if (error) {
 				ntfs_ntput(ip);
 				return (error);
 			}
 		}
 
 		fp->f_flag |= FN_VALID;
 	}
 
 	if (FTOV(fp)) {
 		VGET(FTOV(fp), lkflags, p);
 		*vpp = FTOV(fp);
 		ntfs_ntput(ip);
 		return (0);
 	}
 
 	error = getnewvnode(VT_NTFS, ntmp->ntm_mountp, ntfs_vnodeop_p, &vp);
 	if(error) {
 		ntfs_frele(fp);
 		ntfs_ntput(ip);
 		return (error);
 	}
 	dprintf(("ntfs_vget: vnode: %p for ntnode: %d\n", vp,ino));
 
 	lockinit(&fp->f_lock, PINOD, "fnode", 0, 0);
 	fp->f_vp = vp;
 	vp->v_data = fp;
 	vp->v_type = fp->f_type;
 
 	if (ino == NTFS_ROOTINO)
 		vp->v_flag |= VROOT;
 
 	ntfs_ntput(ip);
 
 	if (lkflags & LK_TYPE_MASK) {
 		error = VN_LOCK(vp, lkflags, p);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 	}
 
 	VREF(fp->f_devvp);
 	*vpp = vp;
 	return (0);
 	
 }
 
 static int
 ntfs_vget(
 	struct mount *mp,
 	ino_t ino,
 	struct vnode **vpp) 
 {
 	return ntfs_vgetex(mp, ino, NTFS_A_DATA, NULL,
 			   LK_EXCLUSIVE, 0, curproc, vpp);
 }
 
 #if defined(__FreeBSD__)
 static struct vfsops ntfs_vfsops = {
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 	NULL
 };
 VFS_SET(ntfs_vfsops, ntfs, 0);
 #elif defined(__NetBSD__)
 extern struct vnodeopv_desc ntfs_vnodeop_opv_desc;
 
 struct vnodeopv_desc *ntfs_vnodeopv_descs[] = {
 	&ntfs_vnodeop_opv_desc,
 	NULL,
 };
 
 struct vfsops ntfs_vfsops = {
 	MOUNT_NTFS,
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 	ntfs_sysctl,
 	ntfs_mountroot,
 	ntfs_checkexp,
 	ntfs_vnodeopv_descs,
 };
 #else
 static struct vfsops ntfs_vfsops = {
 	ntfs_mount,
 	ntfs_start,
 	ntfs_unmount,
 	ntfs_root,
 	ntfs_quotactl,
 	ntfs_statfs,
 	ntfs_sync,
 	ntfs_vget,
 	ntfs_fhtovp,
 	ntfs_vptofh,
 	ntfs_init,
 };
 VFS_SET(ntfs_vfsops, ntfs, MOUNT_NTFS, 0);
 #endif
 
 
Index: head/sys/ntfs/ntfs_vnops.c
===================================================================
--- head/sys/ntfs/ntfs_vnops.c	(revision 49534)
+++ head/sys/ntfs/ntfs_vnops.c	(revision 49535)
@@ -1,1030 +1,1029 @@
 /*	$NetBSD: ntfs_vnops.c,v 1.2 1999/05/06 15:43:20 christos Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * John Heidemann of the UCLA Ficus project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	$Id: ntfs_vnops.c,v 1.4 1999/05/11 19:54:52 phk Exp $
+ *	$Id: ntfs_vnops.c,v 1.5 1999/05/12 09:43:06 semenu Exp $
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
 #include <sys/dirent.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #if defined(__FreeBSD__)
 #include <vm/vnode_pager.h>
 #endif
 #include <vm/vm_extern.h>
 
 #include <sys/sysctl.h>
 
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #include <ntfs/ntfs_extern.h>
-#include <miscfs/specfs/specdev.h>
 
 static int	ntfs_bypass __P((struct vop_generic_args *ap));
 static int	ntfs_read __P((struct vop_read_args *));
 static int	ntfs_write __P((struct vop_write_args *ap));
 static int	ntfs_getattr __P((struct vop_getattr_args *ap));
 static int	ntfs_inactive __P((struct vop_inactive_args *ap));
 static int	ntfs_print __P((struct vop_print_args *ap));
 static int	ntfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	ntfs_strategy __P((struct vop_strategy_args *ap));
 #if defined(__NetBSD__)
 static int	ntfs_islocked __P((struct vop_islocked_args *ap));
 static int	ntfs_unlock __P((struct vop_unlock_args *ap));
 static int	ntfs_lock __P((struct vop_lock_args *ap));
 #endif
 static int	ntfs_access __P((struct vop_access_args *ap));
 static int	ntfs_open __P((struct vop_open_args *ap));
 static int	ntfs_close __P((struct vop_close_args *ap));
 static int	ntfs_readdir __P((struct vop_readdir_args *ap));
 static int	ntfs_lookup __P((struct vop_lookup_args *ap));
 static int	ntfs_bmap __P((struct vop_bmap_args *ap));
 #if defined(__FreeBSD__)
 static int	ntfs_getpages __P((struct vop_getpages_args *ap));
 static int	ntfs_putpages __P((struct vop_putpages_args *));
 #endif
 static int	ntfs_fsync __P((struct vop_fsync_args *ap));
 
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
 #if defined(__FreeBSD__)
 int
 ntfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 int
 ntfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 #endif
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 int
 ntfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	dprintf(("ntfs_bmap: vn: %p, blk: %d\n", ap->a_vp,(u_int32_t)ap->a_bn));
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 #if !defined(__NetBSD__)
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 #endif
 	return (0);
 }
 
 static int
 ntfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int8_t *data;
 	u_int64_t toread;
 	int error;
 
 	dprintf(("ntfs_read: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	toread = fp->f_size;
 
 	dprintf(("ntfs_read: filesize: %d",(u_int32_t)toread));
 
 	toread = min( uio->uio_resid, toread - uio->uio_offset );
 
 	dprintf((", toread: %d\n",(u_int32_t)toread));
 
 	MALLOC(data, u_int8_t *, toread, M_TEMP,M_WAITOK);
 
 	error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, toread, data);
 	if(error) {
 		printf("ntfs_read: ntfs_readattr failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	error = uiomove(data, (int) toread, uio);
 	if(error) {
 		printf("ntfs_read: uiomove failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	FREE(data, M_TEMP);
 
 	return (0);
 }
 
 static int
 ntfs_bypass(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	int error = ENOTTY;
 	dprintf(("ntfs_bypass: %s\n", ap->a_desc->vdesc_name));
 	return (error);
 }
 
 
 static int
 ntfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	register struct vattr *vap = ap->a_vap;
 
 	dprintf(("ntfs_getattr: %d, flags: %d\n",ip->i_number,ip->i_flag));
 
 	vap->va_fsid = dev2udev(fp->f_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
 	vap->va_size = fp->f_size;
 	vap->va_bytes = fp->f_allocated;
 	vap->va_atime = ntfs_nttimetounix(fp->f_times.t_access);
 	vap->va_mtime = ntfs_nttimetounix(fp->f_times.t_write);
 	vap->va_ctime = ntfs_nttimetounix(fp->f_times.t_create);
 	vap->va_flags = ip->i_flag;
 	vap->va_gen = 0;
 	vap->va_blocksize = ip->i_mp->ntm_spc * ip->i_mp->ntm_bps;
 	vap->va_type = fp->f_type;
 	vap->va_filerev = 0;
 	return (0);
 }
 
 
 /*
  * Last reference to an ntnode.  If necessary, write or delete it.
  */
 int
 ntfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 	int error;
 
 	dprintf(("ntfs_inactive: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_inactive: pushing active", vp);
 
 	error = 0;
 
 	VOP__UNLOCK(vp,0,ap->a_p);
 
 	/*
 	 * If we are done with the ntnode, reclaim it
 	 * so that it can be reused immediately.
 	 */
 	if (vp->v_usecount == 0 && ip->i_mode == 0)
 #if defined(__FreeBSD__)
 		vrecycle(vp, (struct simplelock *)0, ap->a_p);
 #else /* defined(__NetBSD__) */
 		vgone(vp);
 #endif
 	return (error);
 }
 
 /*
  * Reclaim an inode so that it can be used for other purposes.
  */
 int
 ntfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	int error;
 
 	dprintf(("ntfs_reclaim: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	error = ntfs_ntget(ip);
 	if (error)
 		return (error);
 
 #if defined(__FreeBSD__)
 	VOP__UNLOCK(vp,0,ap->a_p);
 #endif
 
 	/* Purge old data structures associated with the inode. */
 	cache_purge(vp);
 	if (fp->f_devvp) {
 		vrele(fp->f_devvp);
 		fp->f_devvp = NULL;
 	}
 
 	ntfs_frele(fp);
 
 	vp->v_data = NULL;
 
 	ntfs_ntput(ip);
 
 	return (0);
 }
 
 static int
 ntfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 /*	printf("[ntfs_print]");*/
 	
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 int
 ntfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct ntfsmount *ntmp = ip->i_mp;
 	int error;
 
 	dprintf(("ntfs_strategy: offset: %d, blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_offset,(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 	dprintf(("strategy: bcount: %d flags: 0x%x\n", 
 		(u_int32_t)bp->b_bcount,bp->b_flags));
 
 	if (bp->b_flags & B_READ) {
 		u_int32_t toread;
 
 		if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
 			clrbuf(bp);
 			error = 0;
 		} else {
 			toread = min(bp->b_bcount,
 				 fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
 				toread,(u_int32_t)fp->f_size));
 
 			error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),
 				toread, bp->b_data);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_readattr failed\n");
 				bp->b_error = error;
 				bp->b_flags |= B_ERROR;
 			}
 
 			bzero(bp->b_data + toread, bp->b_bcount - toread);
 		}
 	} else {
 		size_t tmp;
 		u_int32_t towrite;
 
 		if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
 			printf("ntfs_strategy: CAN'T EXTEND FILE\n");
 			bp->b_error = error = EFBIG;
 			bp->b_flags |= B_ERROR;
 		} else {
 			towrite = min(bp->b_bcount,
 				fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
 				towrite,(u_int32_t)fp->f_size));
 
 			error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,	
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
 				bp->b_data, &tmp);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_writeattr fail\n");
 				bp->b_error = error;
 				bp->b_flags |= B_ERROR;
 			}
 		}
 	}
 	biodone(bp);
 	return (error);
 }
 
 static int
 ntfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int8_t *data;
 	u_int64_t towrite;
 	off_t off;
 	size_t written;
 	int error;
 
 	dprintf(("ntfs_write: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	towrite = fp->f_size;
 
 	dprintf(("ntfs_write: filesize: %d",(u_int32_t)towrite));
 
 	if (uio->uio_resid + uio->uio_offset > towrite) {
 		printf("ntfs_write: CAN'T WRITE BEYOND OF FILE\n");
 		return (EFBIG);
 	}
 
 	towrite = min(uio->uio_resid, towrite - uio->uio_offset);
 	off = uio->uio_offset;
 
 	dprintf((", towrite: %d\n",(u_int32_t)towrite));
 
 	MALLOC(data, u_int8_t *, towrite, M_TEMP,M_WAITOK);
 
 	error = uiomove(data, (int) towrite, uio);
 	if(error) {
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, off, towrite, data, &written);
 	if(error) {
 		printf("ntfs_write: ntfs_writeattr failed: %d\n",error);
 		FREE(data, M_TEMP);
 		return (error);
 	}
 
 	FREE(data, M_TEMP);
 
 	return (0);
 }
 
 #if defined(__NetBSD__)
 /*
  * Check for a locked ntnode.
  */
 int
 ntfs_islocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct ntnode *ip = VTONT(ap->a_vp);
 
 	dprintf(("ntfs_islocked %d\n",ip->i_number));
 
 	if (ip->i_flag & IN_LOCKED)
 		return (1);
 	return (0);
 }
 
 /*
  * Unlock an ntnode.  If WANT bit is on, wakeup.
  */
 int ntfs_lockcount = 90;
 int
 ntfs_unlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct ntnode *ip = VTONT(ap->a_vp);
 #ifdef DIAGNOSTIC
 	struct proc *p = curproc;
 #endif
 
 	dprintf(("ntfs_unlock %d\n",ip->i_number));
 
 #ifdef DIAGNOSTIC
 
 	if ((ip->i_flag & IN_LOCKED) == 0) {
 		vprint("ntfs_unlock: unlocked ntnode", ap->a_vp);
 		panic("ntfs_unlock NOT LOCKED");
 	}
 	if (p && p->p_pid != ip->i_lockholder && p->p_pid > -1 &&
 	    ip->i_lockholder > -1 && ntfs_lockcount++ < 100)
 		panic("unlocker (%d) != lock holder (%d)",
 		    p->p_pid, ip->i_lockholder);
 #endif
 
 	if (--ip->i_lockcount > 0) {
 		if ((ip->i_flag & IN_RECURSE) == 0)
 			panic("ntfs_unlock: recursive lock prematurely released, pid=%d\n", ip->i_lockholder);
 		return (0);
 	}
 	ip->i_lockholder = 0;
 	ip->i_flag &= ~(IN_LOCKED|IN_RECURSE);
 	if (ip->i_flag & IN_WANTED) {
 		ip->i_flag &= ~IN_WANTED;
 		wakeup((caddr_t)ip);
 	}
 	return (0);
 }
 
 /*
  * Lock an ntnode. If its already locked, set the WANT bit and sleep.
  */
 int
 ntfs_lock(ap)
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct proc *p = curproc;
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	dprintf(("ntfs_lock %d (%d locks)\n",ip->i_number,ip->i_lockcount));
 
 start:
 	while (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		(void) tsleep((caddr_t)vp, PINOD, "ntflk1", 0);
 	}
 	if (vp->v_tag == VT_NON)
 		return (ENOENT);
 	ip = VTONT(vp);
 	if (ip->i_flag & IN_LOCKED) {
 		if (p->p_pid == ip->i_lockholder) {
 			if( (ip->i_flag & IN_RECURSE) == 0)
 				panic("ntfs_lock: recursive lock not expected, pid: %d\n",
 					ip->i_lockholder);
 		} else {
 			ip->i_flag |= IN_WANTED;
 #ifdef DIAGNOSTIC
 			if (p)
 				ip->i_lockwaiter = p->p_pid;
 			else
 				ip->i_lockwaiter = -1;
 #endif
 			(void) tsleep((caddr_t)ip, PINOD, "ntflk2", 0);
 			goto start;
 		}
 	}
 #ifdef DIAGNOSTIC
 	ip->i_lockwaiter = 0;
 	if (((ip->i_flag & IN_RECURSE) == 0) && (ip->i_lockholder != 0))
 		panic("lockholder (%d) != 0", ip->i_lockholder);
 	if (p && p->p_pid == 0)
 		printf("locking by process 0\n");
 #endif
 
 	if ((ip->i_flag & IN_RECURSE) == 0)
 		ip->i_lockcount = 1;
 	else
 		++ip->i_lockcount;
 
 	if (p)
 		ip->i_lockholder = p->p_pid;
 	else
 		ip->i_lockholder = -1;
 	ip->i_flag |= IN_LOCKED;
 	return (0);
 }
 #endif
 
 int
 ntfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct ntnode *ip = VTONT(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 #ifdef QUOTA
 	int error;
 #endif
 
 	dprintf(("ntfs_access: %d\n",ip->i_number));
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch ((int)vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if (error = getinoquota(ip))
 				return (error);
 #endif
 			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 /*
 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
 		return (EPERM);
 */
 
 	/* Otherwise, user id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->i_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->i_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 }
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 static int
 ntfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_open: %d\n",ip->i_number);
 #endif
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ntfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_close: %d\n",ip->i_number);
 #endif
 
 	return (0);
 }
 
 int
 ntfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_ncookies;
 		u_int **cookies;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	int i, error = 0;
 	u_int32_t faked = 0, num;
 	int ncookies = 0;
 	struct dirent cde;
 	off_t off;
 
 	dprintf(("ntfs_readdir %d off: %d resid: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	off = uio->uio_offset;
 
 	/* Simulate . in every dir except ROOT */
 	if( ip->i_number != NTFS_ROOTINO ) {
 		struct dirent dot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 1, "." };
 
 		if( uio->uio_offset < sizeof(struct dirent) ) {
 			dot.d_fileno = ip->i_number;
 			error = uiomove((char *)&dot,sizeof(struct dirent),uio);
 			if(error)
 				return (error);
 
 			ncookies ++;
 		}
 	}
 
 	/* Simulate .. in every dir including ROOT */
 	if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
 		struct dirent dotdot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 2, ".." };
 
 		error = uiomove((char *)&dotdot,sizeof(struct dirent),uio);
 		if(error)
 			return (error);
 
 		ncookies ++;
 	}
 
 	faked = (ip->i_number == NTFS_ROOTINO) ? 1 : 2;
 	num = uio->uio_offset / sizeof(struct dirent) - faked;
 
 	while( uio->uio_resid >= sizeof(struct dirent) ) {
 		struct attr_indexentry *iep;
 
 		error = ntfs_ntreaddir(ntmp, fp, num, &iep);
 
 		if(error)
 			return (error);
 
 		if( NULL == iep )
 			break;
 
 		while( !(iep->ie_flag & NTFS_IEFLAG_LAST) && (uio->uio_resid >= sizeof(struct dirent)) ) {
 
 			if( ntfs_isnamepermitted(ntmp,iep) ) {
 				dprintf(("ntfs_readdir: elem: %d, fname:[",num));
 				for(i=0;i<iep->ie_fnamelen;i++) {
 					cde.d_name[i] = (char)iep->ie_fname[i];
 					dprintf(("%c", cde.d_name[i]));
 				}
 				dprintf(("] type: %d, flag: %d, ",iep->ie_fnametype, iep->ie_flag));
 				cde.d_name[i] = '\0';
 				cde.d_namlen = iep->ie_fnamelen;
 				cde.d_fileno = iep->ie_number;
 				cde.d_type = (iep->ie_fflag & NTFS_FFLAG_DIR) ? DT_DIR : DT_REG;
 				cde.d_reclen = sizeof(struct dirent);
 				dprintf(("%s\n", (cde.d_type == DT_DIR) ? "dir":"reg"));
 
 				error = uiomove((char *)&cde, sizeof(struct dirent), uio);
 				if(error)
 					return (error);
 
 				ncookies++;
 				num++;
 			}
 
 			iep = NTFS_NEXTREC(iep,struct attr_indexentry *);
 		}
 	}
 
 	dprintf(("ntfs_readdir: %d entries (%d bytes) read\n",
 		ncookies,(u_int)(uio->uio_offset - off)));
 	dprintf(("ntfs_readdir: off: %d resid: %d\n",
 		(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dp;
 #if defined(__FreeBSD__)
 		u_long *cookies;
 		u_long *cookiep;
 #else /* defined(__NetBSD__) */
 		off_t *cookies;
 		off_t *cookiep;
 #endif
 
 		printf("ntfs_readdir: %d cookies\n",ncookies);
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ntfs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     ((caddr_t)uio->uio_iov->iov_base -
 			 (uio->uio_offset - off));
 #if defined(__FreeBSD__)
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
 		       M_TEMP, M_WAITOK);
 #else /* defined(__NetBSD__) */
 		MALLOC(cookies, off_t *, ncookies * sizeof(off_t),
 		       M_TEMP, M_WAITOK);
 #endif
 		for (dp = dpStart, cookiep = cookies, i=0;
 		     i < ncookies;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen), i++) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_int) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 /*
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTONT(ap->a_vp)->i_size <= uio->uio_offset;
 */
 	return (error);
 }
 
 int
 ntfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct ntnode *dip = VTONT(dvp);
 	struct ntfsmount *ntmp = dip->i_mp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int error;
 	int lockparent = cnp->cn_flags & LOCKPARENT;
 #if NTFS_DEBUG
 	int wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
 #endif
 	dprintf(("ntfs_lookup: %s (%ld bytes) in %d, lp: %d, wp: %d \n",
 		cnp->cn_nameptr, cnp->cn_namelen,
 		dip->i_number,lockparent, wantparent));
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, cnp->cn_proc);
 	if(error)
 		return (error);
 
 	if( (cnp->cn_namelen == 1) &&
 	    !strncmp(cnp->cn_nameptr,".",1) ) {
 		dprintf(("ntfs_lookup: faking . directory in %d\n",
 			dip->i_number));
 
 		VREF(dvp);
 		*ap->a_vpp = dvp;
 		return (0);
 	} else if( (cnp->cn_namelen == 2) &&
 	    !strncmp(cnp->cn_nameptr,"..",2) &&
 	    (cnp->cn_flags & ISDOTDOT) ) {
 		struct ntvattr *vap;
 
 		dprintf(("ntfs_lookup: faking .. directory in %d\n",
 			 dip->i_number));
 
 		error = ntfs_ntvattrget(ntmp, dip, NTFS_A_NAME, NULL, 0, &vap);
 		if(error)
 			return (error);
 
 		VOP__UNLOCK(dvp,0,cnp->cn_proc);
 
 		dprintf(("ntfs_lookup: parentdir: %d\n",
 			 vap->va_a_name->n_pnumber));
 		error = VFS_VGET(ntmp->ntm_mountp,
 				 vap->va_a_name->n_pnumber,ap->a_vpp); 
 		ntfs_ntvattrrele(vap);
 		if(error) {
 			VOP__LOCK(dvp, 0, cnp->cn_proc);
 			return(error);
 		}
 
 		if( lockparent && (cnp->cn_flags & ISLASTCN) && 
 		    (error = VOP__LOCK(dvp, 0, cnp->cn_proc)) ) {
 			vput( *(ap->a_vpp) );
 			return (error);
 		}
 		return (error);
 	} else {
 		error = ntfs_ntlookupfile(ntmp, dvp, cnp, ap->a_vpp);
 		if(error)
 			return (error);
 
 		dprintf(("ntfs_lookup: found ino: %d\n", 
 			VTONT(*ap->a_vpp)->i_number));
 
 		if(!lockparent || !(cnp->cn_flags & ISLASTCN))
 			VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, *ap->a_vpp, cnp);
 
 	}
 	return (error);
 }
 
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 ntfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	return (0);
 }
 
 /*
  * Global vfs data structures
  */
 vop_t **ntfs_vnodeop_p;
 #if defined(__FreeBSD__)
 static
 #endif
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *)ntfs_bypass },
 
 	{ &vop_getattr_desc, (vop_t *)ntfs_getattr },
 	{ &vop_inactive_desc, (vop_t *)ntfs_inactive },
 	{ &vop_reclaim_desc, (vop_t *)ntfs_reclaim },
 	{ &vop_print_desc, (vop_t *)ntfs_print },
 
 #if defined(__FreeBSD__)
 	{ &vop_islocked_desc, (vop_t *)vop_stdislocked },
 	{ &vop_unlock_desc, (vop_t *)vop_stdunlock },
 	{ &vop_lock_desc, (vop_t *)vop_stdlock },
 	{ &vop_cachedlookup_desc, (vop_t *)ntfs_lookup },
 	{ &vop_lookup_desc, (vop_t *)vfs_cache_lookup },
 #else
 	{ &vop_islocked_desc, (vop_t *)ntfs_islocked },
 	{ &vop_unlock_desc, (vop_t *)ntfs_unlock },
 	{ &vop_lock_desc, (vop_t *)ntfs_lock },
 	{ &vop_lookup_desc, (vop_t *)ntfs_lookup },
 #endif
 
 	{ &vop_access_desc, (vop_t *)ntfs_access },
 	{ &vop_close_desc, (vop_t *)ntfs_close },
 	{ &vop_open_desc, (vop_t *)ntfs_open },
 	{ &vop_readdir_desc, (vop_t *)ntfs_readdir },
 	{ &vop_fsync_desc, (vop_t *)ntfs_fsync },
 
 	{ &vop_bmap_desc, (vop_t *)ntfs_bmap },
 #if defined(__FreeBSD__)
 	{ &vop_getpages_desc, (vop_t *) ntfs_getpages },
 	{ &vop_putpages_desc, (vop_t *) ntfs_putpages },
 #endif
 	{ &vop_strategy_desc, (vop_t *)ntfs_strategy },
 #if defined(__FreeBSD__)
 	{ &vop_bwrite_desc, (vop_t *)vop_stdbwrite },
 #else /* defined(__NetBSD__) */
 	{ &vop_bwrite_desc, (vop_t *)vn_bwrite },
 #endif
 	{ &vop_read_desc, (vop_t *)ntfs_read },
 	{ &vop_write_desc, (vop_t *)ntfs_write },
 
 	{ NULL, NULL }
 };
 
 #if defined(__FreeBSD__)
 static
 #endif
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 #if defined(__FreeBSD__)
 VNODEOP_SET(ntfs_vnodeop_opv_desc);
 #endif
Index: head/sys/sys/conf.h
===================================================================
--- head/sys/sys/conf.h	(revision 49534)
+++ head/sys/sys/conf.h	(revision 49535)
@@ -1,231 +1,278 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)conf.h	8.5 (Berkeley) 1/9/95
- * $Id: conf.h,v 1.66 1999/07/17 19:58:51 phk Exp $
+ * $Id: conf.h,v 1.67 1999/07/20 09:47:50 phk Exp $
  */
 
 #ifndef _SYS_CONF_H_
 #define	_SYS_CONF_H_
 
+#define SPECNAMELEN	15
+
+struct tty;
+struct vnode;
+
+struct specinfo {
+	struct	mount *si_mountpoint;
+	int		si_bsize_phys;	/* minimum physical block size */
+	int		si_bsize_best;	/* optimal block size / VBLK */
+	int		si_bsize_max;	/* maximum block size */
+
+	udev_t		si_udev;
+	SLIST_ENTRY(specinfo)	si_hash;
+	struct vnode	*si_hlist;
+	char		si_name[SPECNAMELEN + 1];
+	void		*si_drv1, *si_drv2;
+	struct cdevsw	*si_devsw;
+	union {
+		struct {
+			struct tty *__sit_tty;
+		} __si_tty;
+	} __si_u;
+};
+
+#define si_tty_tty	__si_u.__si_tty.__sit_tty
+
 /*
+ * Exported shorthand
+ */
+#define v_hashchain v_specinfo->si_hlist
+#define v_specmountpoint v_specinfo->si_mountpoint
+
+/*
+ * Special device management
+ */
+#define	SPECHSZ	64
+#define	SPECHASH(rdev)	(((unsigned)(minor(rdev)))%SPECHSZ)
+
+/*
  * Definitions of device driver entry switches
  */
 
 struct buf;
 struct proc;
-struct specinfo;
-struct tty;
 struct uio;
-struct vnode;
 
 typedef int d_open_t __P((dev_t dev, int oflags, int devtype, struct proc *p));
 typedef int d_close_t __P((dev_t dev, int fflag, int devtype, struct proc *p));
 typedef void d_strategy_t __P((struct buf *bp));
 typedef int d_parms_t __P((dev_t dev, struct specinfo *sinfo, int ctl));
 typedef int d_ioctl_t __P((dev_t dev, u_long cmd, caddr_t data,
 			   int fflag, struct proc *p));
 typedef int d_dump_t __P((dev_t dev));
 typedef int d_psize_t __P((dev_t dev));
 
 typedef int d_read_t __P((dev_t dev, struct uio *uio, int ioflag));
 typedef int d_write_t __P((dev_t dev, struct uio *uio, int ioflag));
 typedef void d_stop_t __P((struct tty *tp, int rw));
 typedef int d_reset_t __P((dev_t dev));
 typedef struct tty *d_devtotty_t __P((dev_t dev));
 typedef int d_poll_t __P((dev_t dev, int events, struct proc *p));
 typedef int d_mmap_t __P((dev_t dev, vm_offset_t offset, int nprot));
 
 typedef int l_open_t __P((dev_t dev, struct tty *tp));
 typedef int l_close_t __P((struct tty *tp, int flag));
 typedef int l_read_t __P((struct tty *tp, struct uio *uio, int flag));
 typedef int l_write_t __P((struct tty *tp, struct uio *uio, int flag));
 typedef int l_ioctl_t __P((struct tty *tp, u_long cmd, caddr_t data,
 			   int flag, struct proc *p));
 typedef int l_rint_t __P((int c, struct tty *tp));
 typedef int l_start_t __P((struct tty *tp));
 typedef int l_modem_t __P((struct tty *tp, int flag));
 
 /*
  * Types for d_type.
  */
 #define	D_TAPE	1
 #define	D_DISK	2
 #define	D_TTY	3
 
 #define	D_TYPEMASK	0xffff
 
 /*
  * Flags for d_flags.
  */
 #define	D_NOCLUSTERR	0x10000		/* disables cluter read */
 #define	D_NOCLUSTERW	0x20000		/* disables cluster write */
 #define	D_NOCLUSTERRW	(D_NOCLUSTERR | D_NOCLUSTERW)
 #define	D_CANFREE	0x40000		/* can free blocks */
 
 /*
- * Control type for d_parms() call.
- */
-#define DPARM_GET	0		/* ask device to load parms in  */
-
-/*
  * Character device switch table
  */
 struct cdevsw {
 	d_open_t	*d_open;
 	d_close_t	*d_close;
 	d_read_t	*d_read;
 	d_write_t	*d_write;
 	d_ioctl_t	*d_ioctl;
 	d_stop_t	*d_stop;
 	d_reset_t	*d_bogoreset;	/* XXX not used */
 	d_devtotty_t	*d_devtotty;
 	d_poll_t	*d_poll;
 	d_mmap_t	*d_mmap;
 	d_strategy_t	*d_strategy;
 	char		*d_name;	/* base device name, e.g. 'vn' */
 	d_parms_t	*d_bogoparms;	/* XXX not used */
 	int		d_maj;
 	d_dump_t	*d_dump;
 	d_psize_t	*d_psize;
 	u_int		d_flags;
 	int		d_maxio;
 	int		d_bmaj;
 };
 
 /*
  * Line discipline switch table
  */
 struct linesw {
 	l_open_t	*l_open;
 	l_close_t	*l_close;
 	l_read_t	*l_read;
 	l_write_t	*l_write;
 	l_ioctl_t	*l_ioctl;
 	l_rint_t	*l_rint;
 	l_start_t	*l_start;
 	l_modem_t	*l_modem;
 	u_char		l_hotchar;
 };
 
 #ifdef KERNEL
 extern struct linesw linesw[];
 extern int nlinesw;
 
 int ldisc_register __P((int , struct linesw *));
 void ldisc_deregister __P((int));
 #define LDISC_LOAD 	-1		/* Loadable line discipline */
 #endif
 
 /*
  * Swap device table
  */
 struct swdevt {
 	udev_t	sw_dev;			/* For quasibogus swapdev reporting */
 	int	sw_flags;
 	int	sw_nblks;
 	struct	vnode *sw_vp;
 	dev_t	sw_device;
 };
 #define	SW_FREED	0x01
 #define	SW_SEQUENTIAL	0x02
 #define	sw_freed	sw_flags	/* XXX compat */
 
 #ifdef KERNEL
 d_open_t	noopen;
 d_close_t	noclose;
 d_read_t	noread;
 d_write_t	nowrite;
 d_ioctl_t	noioctl;
 d_stop_t	nostop;
 d_reset_t	noreset;
 d_devtotty_t	nodevtotty;
 d_mmap_t	nommap;
 #define	nostrategy	((d_strategy_t *)NULL)
 #define	noparms	((d_parms_t *)NULL)
 #define	nopoll	seltrue
 
 d_dump_t	nodump;
 
 #define NUMCDEVSW 256
 
 /*
  * nopsize is little used, so not worth having dummy functions for.
  */
 #define	nopsize	((d_psize_t *)NULL)
 
 d_open_t	nullopen;
 d_close_t	nullclose;
 
 l_read_t	l_noread;
 l_write_t	l_nowrite;
 
 struct module;
 
 struct devsw_module_data {
 	int	(*chainevh)(struct module *, int, void *); /* next handler */
 	void	*chainarg;	/* arg for next event handler */
 	struct	cdevsw *cdevsw;	/* device functions */
 	/* Do not initialize fields hereafter */
 };
 
 #define DEV_MODULE(name, cmaj, bmaj, devsw, evh, arg)			\
 static struct devsw_module_data name##_devsw_mod = {			\
     evh, arg, &devsw							\
 };									\
 									\
 static moduledata_t name##_mod = {					\
     #name,								\
     devsw_module_handler,						\
     &name##_devsw_mod							\
 };									\
 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+cmaj*256+bmaj)
 
 
 struct cdevsw *bdevsw __P((dev_t dev));
 int	cdevsw_add __P((struct cdevsw *new));
 int	cdevsw_remove __P((struct cdevsw *old));
 dev_t	chrtoblk __P((dev_t dev));
 struct cdevsw *devsw __P((dev_t dev));
 int	devsw_module_handler __P((struct module *mod, int what, void *arg));
 int	iskmemdev __P((dev_t dev));
 int	iszerodev __P((dev_t dev));
 dev_t	makebdev __P((int maj, int min));
+dev_t	make_dev __P((struct cdevsw *devsw, int minor, uid_t uid, gid_t gid, int perms, char *fmt, ...)) __printflike(6, 7);
 void	setconf __P((void));
+
+/*
+ * XXX: This gunk included in case DEVFS resurfaces 
+ */
+
+#define		UID_ROOT	0
+#define		UID_BIN		3
+#define		UID_UUCP	66
+
+#define		GID_WHEEL	0
+#define		GID_KMEM	2
+#define		GID_OPERATOR	5
+#define		GID_BIN		7
+#define		GID_GAMES	13
+#define		GID_DIALER	68
+
 #endif /* KERNEL */
 
 #endif /* !_SYS_CONF_H_ */
Index: head/sys/sys/devfsext.h
===================================================================
--- head/sys/sys/devfsext.h	(revision 49534)
+++ head/sys/sys/devfsext.h	(revision 49535)
@@ -1,85 +1,72 @@
 /*
  * Copyright 1997,1998 Julian Elischer.  All rights reserved.
  * julian@freebsd.org
  * 
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *  1. Redistributions of source code must retain the above copyright
  *     notice, this list of conditions and the following disclaimer.
  *  2. Redistributions in binary form must reproduce the above copyright notice,
  *     this list of conditions and the following disclaimer in the documentation
  *     and/or other materials provided with the distribution.
  * 
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER ``AS IS'' AND ANY EXPRESS
  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE HOLDER OR CONTRIBUTORS BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  * 
- * $Id: devfsext.h,v 1.21 1998/07/13 06:45:16 bde Exp $
+ * $Id: devfsext.h,v 1.22 1998/12/10 19:57:01 eivind Exp $
  */
 
 #ifndef _SYS_DEVFSEXT_H_
 #define	_SYS_DEVFSEXT_H_
 
 /*
  * Make a device at a path, and get a cookie for it in return.
  * Specify the type, the minor number and the devsw entry to use,
  * and the initial default perms/ownerships.
  */
 void	*devfs_add_devswf __P((void *devsw, int minor, int chrblk, uid_t uid,
 			       gid_t gid, int perms, char *fmt, ...))
 			       __printflike(7, 8);
 /*
  * Make a link to a device you already made, and have the cookie for 
  * We get another cookie, but for now, it can be discarded, as
  * at the moment there is nothing you can do with it that you couldn't do
  * with the original cookie. ( XXX this might be something I should change )
  */
 void	*devfs_makelink __P((void *original, char *fmt, ...)) __printflike(2, 3);
 
 /*
  * Remove all instances of a device you have made. INCLUDING LINKS.
  * I.e. either the cookie from the original device or the cookie
  * from a link will have the effect of removing both entries.
  * Removing with BOTH an original cookie and one from a link is
  * likely to cause a panic.
  */
 void	devfs_remove_dev __P((void *devnmp));
 
 /*
  * Check if a device exists and is the type you need. Returns NULL or a
  * cookie that can be used to try 'open' the device. XXX This is a bit
  * of a duplication of devfs_lookup(). I might one day try merge them a bit.
  * Used for mountroot under DEVFS. Path is relative to the base of the devfs.
  */
 struct vnode *devfs_open_device __P((char *path, int devtype));
 void devfs_close_device __P((struct vnode *vn));
 
 dev_t devfs_vntodev __P((struct vnode *vn)); /* extract dev_t from devfs vn */
 
 #define DV_CHR 0
 #define DV_BLK 1
 #define DV_DEV 2
-
-/* XXX */
-#define	UID_ROOT	0
-#define	UID_BIN		3
-#define	UID_UUCP	66
-
-/* XXX */
-#define	GID_WHEEL	0
-#define	GID_KMEM	2
-#define	GID_OPERATOR	5
-#define	GID_BIN		7
-#define	GID_GAMES	13
-#define	GID_DIALER	68
 
 #endif /* !_SYS_DEVFSEXT_H_ */
Index: head/sys/sys/linedisc.h
===================================================================
--- head/sys/sys/linedisc.h	(revision 49534)
+++ head/sys/sys/linedisc.h	(revision 49535)
@@ -1,231 +1,278 @@
 /*-
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)conf.h	8.5 (Berkeley) 1/9/95
- * $Id: conf.h,v 1.66 1999/07/17 19:58:51 phk Exp $
+ * $Id: conf.h,v 1.67 1999/07/20 09:47:50 phk Exp $
  */
 
 #ifndef _SYS_CONF_H_
 #define	_SYS_CONF_H_
 
+#define SPECNAMELEN	15
+
+struct tty;
+struct vnode;
+
+struct specinfo {
+	struct	mount *si_mountpoint;
+	int		si_bsize_phys;	/* minimum physical block size */
+	int		si_bsize_best;	/* optimal block size / VBLK */
+	int		si_bsize_max;	/* maximum block size */
+
+	udev_t		si_udev;
+	SLIST_ENTRY(specinfo)	si_hash;
+	struct vnode	*si_hlist;
+	char		si_name[SPECNAMELEN + 1];
+	void		*si_drv1, *si_drv2;
+	struct cdevsw	*si_devsw;
+	union {
+		struct {
+			struct tty *__sit_tty;
+		} __si_tty;
+	} __si_u;
+};
+
+#define si_tty_tty	__si_u.__si_tty.__sit_tty
+
 /*
+ * Exported shorthand
+ */
+#define v_hashchain v_specinfo->si_hlist
+#define v_specmountpoint v_specinfo->si_mountpoint
+
+/*
+ * Special device management
+ */
+#define	SPECHSZ	64
+#define	SPECHASH(rdev)	(((unsigned)(minor(rdev)))%SPECHSZ)
+
+/*
  * Definitions of device driver entry switches
  */
 
 struct buf;
 struct proc;
-struct specinfo;
-struct tty;
 struct uio;
-struct vnode;
 
 typedef int d_open_t __P((dev_t dev, int oflags, int devtype, struct proc *p));
 typedef int d_close_t __P((dev_t dev, int fflag, int devtype, struct proc *p));
 typedef void d_strategy_t __P((struct buf *bp));
 typedef int d_parms_t __P((dev_t dev, struct specinfo *sinfo, int ctl));
 typedef int d_ioctl_t __P((dev_t dev, u_long cmd, caddr_t data,
 			   int fflag, struct proc *p));
 typedef int d_dump_t __P((dev_t dev));
 typedef int d_psize_t __P((dev_t dev));
 
 typedef int d_read_t __P((dev_t dev, struct uio *uio, int ioflag));
 typedef int d_write_t __P((dev_t dev, struct uio *uio, int ioflag));
 typedef void d_stop_t __P((struct tty *tp, int rw));
 typedef int d_reset_t __P((dev_t dev));
 typedef struct tty *d_devtotty_t __P((dev_t dev));
 typedef int d_poll_t __P((dev_t dev, int events, struct proc *p));
 typedef int d_mmap_t __P((dev_t dev, vm_offset_t offset, int nprot));
 
 typedef int l_open_t __P((dev_t dev, struct tty *tp));
 typedef int l_close_t __P((struct tty *tp, int flag));
 typedef int l_read_t __P((struct tty *tp, struct uio *uio, int flag));
 typedef int l_write_t __P((struct tty *tp, struct uio *uio, int flag));
 typedef int l_ioctl_t __P((struct tty *tp, u_long cmd, caddr_t data,
 			   int flag, struct proc *p));
 typedef int l_rint_t __P((int c, struct tty *tp));
 typedef int l_start_t __P((struct tty *tp));
 typedef int l_modem_t __P((struct tty *tp, int flag));
 
 /*
  * Types for d_type.
  */
 #define	D_TAPE	1
 #define	D_DISK	2
 #define	D_TTY	3
 
 #define	D_TYPEMASK	0xffff
 
 /*
  * Flags for d_flags.
  */
 #define	D_NOCLUSTERR	0x10000		/* disables cluter read */
 #define	D_NOCLUSTERW	0x20000		/* disables cluster write */
 #define	D_NOCLUSTERRW	(D_NOCLUSTERR | D_NOCLUSTERW)
 #define	D_CANFREE	0x40000		/* can free blocks */
 
 /*
- * Control type for d_parms() call.
- */
-#define DPARM_GET	0		/* ask device to load parms in  */
-
-/*
  * Character device switch table
  */
 struct cdevsw {
 	d_open_t	*d_open;
 	d_close_t	*d_close;
 	d_read_t	*d_read;
 	d_write_t	*d_write;
 	d_ioctl_t	*d_ioctl;
 	d_stop_t	*d_stop;
 	d_reset_t	*d_bogoreset;	/* XXX not used */
 	d_devtotty_t	*d_devtotty;
 	d_poll_t	*d_poll;
 	d_mmap_t	*d_mmap;
 	d_strategy_t	*d_strategy;
 	char		*d_name;	/* base device name, e.g. 'vn' */
 	d_parms_t	*d_bogoparms;	/* XXX not used */
 	int		d_maj;
 	d_dump_t	*d_dump;
 	d_psize_t	*d_psize;
 	u_int		d_flags;
 	int		d_maxio;
 	int		d_bmaj;
 };
 
 /*
  * Line discipline switch table
  */
 struct linesw {
 	l_open_t	*l_open;
 	l_close_t	*l_close;
 	l_read_t	*l_read;
 	l_write_t	*l_write;
 	l_ioctl_t	*l_ioctl;
 	l_rint_t	*l_rint;
 	l_start_t	*l_start;
 	l_modem_t	*l_modem;
 	u_char		l_hotchar;
 };
 
 #ifdef KERNEL
 extern struct linesw linesw[];
 extern int nlinesw;
 
 int ldisc_register __P((int , struct linesw *));
 void ldisc_deregister __P((int));
 #define LDISC_LOAD 	-1		/* Loadable line discipline */
 #endif
 
 /*
  * Swap device table
  */
 struct swdevt {
 	udev_t	sw_dev;			/* For quasibogus swapdev reporting */
 	int	sw_flags;
 	int	sw_nblks;
 	struct	vnode *sw_vp;
 	dev_t	sw_device;
 };
 #define	SW_FREED	0x01
 #define	SW_SEQUENTIAL	0x02
 #define	sw_freed	sw_flags	/* XXX compat */
 
 #ifdef KERNEL
 d_open_t	noopen;
 d_close_t	noclose;
 d_read_t	noread;
 d_write_t	nowrite;
 d_ioctl_t	noioctl;
 d_stop_t	nostop;
 d_reset_t	noreset;
 d_devtotty_t	nodevtotty;
 d_mmap_t	nommap;
 #define	nostrategy	((d_strategy_t *)NULL)
 #define	noparms	((d_parms_t *)NULL)
 #define	nopoll	seltrue
 
 d_dump_t	nodump;
 
 #define NUMCDEVSW 256
 
 /*
  * nopsize is little used, so not worth having dummy functions for.
  */
 #define	nopsize	((d_psize_t *)NULL)
 
 d_open_t	nullopen;
 d_close_t	nullclose;
 
 l_read_t	l_noread;
 l_write_t	l_nowrite;
 
 struct module;
 
 struct devsw_module_data {
 	int	(*chainevh)(struct module *, int, void *); /* next handler */
 	void	*chainarg;	/* arg for next event handler */
 	struct	cdevsw *cdevsw;	/* device functions */
 	/* Do not initialize fields hereafter */
 };
 
 #define DEV_MODULE(name, cmaj, bmaj, devsw, evh, arg)			\
 static struct devsw_module_data name##_devsw_mod = {			\
     evh, arg, &devsw							\
 };									\
 									\
 static moduledata_t name##_mod = {					\
     #name,								\
     devsw_module_handler,						\
     &name##_devsw_mod							\
 };									\
 DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+cmaj*256+bmaj)
 
 
 struct cdevsw *bdevsw __P((dev_t dev));
 int	cdevsw_add __P((struct cdevsw *new));
 int	cdevsw_remove __P((struct cdevsw *old));
 dev_t	chrtoblk __P((dev_t dev));
 struct cdevsw *devsw __P((dev_t dev));
 int	devsw_module_handler __P((struct module *mod, int what, void *arg));
 int	iskmemdev __P((dev_t dev));
 int	iszerodev __P((dev_t dev));
 dev_t	makebdev __P((int maj, int min));
+dev_t	make_dev __P((struct cdevsw *devsw, int minor, uid_t uid, gid_t gid, int perms, char *fmt, ...)) __printflike(6, 7);
 void	setconf __P((void));
+
+/*
+ * XXX: This gunk included in case DEVFS resurfaces 
+ */
+
+#define		UID_ROOT	0
+#define		UID_BIN		3
+#define		UID_UUCP	66
+
+#define		GID_WHEEL	0
+#define		GID_KMEM	2
+#define		GID_OPERATOR	5
+#define		GID_BIN		7
+#define		GID_GAMES	13
+#define		GID_DIALER	68
+
 #endif /* KERNEL */
 
 #endif /* !_SYS_CONF_H_ */
Index: head/sys/sys/vnode.h
===================================================================
--- head/sys/sys/vnode.h	(revision 49534)
+++ head/sys/sys/vnode.h	(revision 49535)
@@ -1,582 +1,584 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
- * $Id: vnode.h,v 1.91 1999/07/20 09:47:54 phk Exp $
+ * $Id: vnode.h,v 1.92 1999/07/26 06:25:53 alc Exp $
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/queue.h>
 #include <sys/select.h>
 #include <sys/uio.h>
 
 #include <machine/lock.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
 
 /*
  * Vnode tag types.
  * These are for the benefit of external programs only (e.g., pstat)
  * and should NEVER be inspected by the kernel.
  */
 enum vtagtype	{
 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
 	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
 	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS, VT_CODA, VT_NTFS
 };
 
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 TAILQ_HEAD(buflists, buf);
 
 typedef	int 	vop_t __P((void *));
 struct namecache;
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  * v_freelist is locked by the global vnode_free_list simple lock.
  * v_mntvnodes is locked by the global mntvnodes simple lock.
  * v_flag, v_usecount, v_holdcount and v_writecount are
  *    locked by the v_interlock simple lock.
  * v_pollinfo is locked by the lock contained inside it.
  */
 struct vnode {
 	u_long	v_flag;				/* vnode flags (see below) */
 	int	v_usecount;			/* reference count of users */
 	int	v_writecount;			/* reference count of writers */
 	int	v_holdcnt;			/* page & buffer references */
 	daddr_t	v_lastr;			/* last read (read-ahead) */
 	u_long	v_id;				/* capability identifier */
 	struct	mount *v_mount;			/* ptr to vfs we are in */
 	vop_t	**v_op;				/* vnode operations vector */
 	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
 	LIST_ENTRY(vnode) v_synclist;		/* vnodes with dirty buffers */
 	long	v_numoutput;			/* num of writes in progress */
 	enum	vtype v_type;			/* vnode type */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
 		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
 		struct {
 			struct specinfo	*vu_specinfo; /* device (VCHR, VBLK) */
 			struct vnode *vu_specnext;
 		} vu_spec;
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 	struct	nqlease *v_lease;		/* Soft reference to lease */
 	daddr_t	v_lastw;			/* last write (write cluster) */
 	daddr_t	v_cstart;			/* start block of cluster */
 	daddr_t	v_lasta;			/* last allocation */
 	int	v_clen;				/* length of current cluster */
 	int	v_maxio;			/* maximum I/O cluster size */
 	struct vm_object *v_object;		/* Place to store VM object */
 	struct	simplelock v_interlock;		/* lock on usecount and flag */
 	struct	lock *v_vnlock;			/* used for non-locking fs's */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 	LIST_HEAD(, namecache) v_cache_src;	/* Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* Cache entries to us */
 	struct	vnode *v_dd;			/* .. vnode */
 	u_long	v_ddid;				/* .. capability identifier */
 	struct	{
 		struct	simplelock vpi_lock;	/* lock to protect below */
 		struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 		short	vpi_events;		/* what they are looking for */
 		short	vpi_revents;		/* what has happened */
 	} v_pollinfo;
 #ifdef	DEBUG_LOCKS
 	const char *filename;			/* Source file doing locking */
 	int line;				/* Line number doing locking */
 #endif
 };
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
 #define	v_specinfo	v_un.vu_spec.vu_specinfo
 #define	v_rdev		v_un.vu_spec.vu_specinfo
 #define	v_specnext	v_un.vu_spec.vu_specnext
 #define	v_fifoinfo	v_un.vu_fifoinfo
 
 #define	VN_POLLEVENT(vp, events)				\
 	do {							\
 		if ((vp)->v_pollinfo.vpi_events & (events))	\
 			vn_pollevent((vp), (events));		\
 	} while (0)
 
 /*
  * Vnode flags.
  */
 #define	VROOT		0x00001	/* root of its file system */
 #define	VTEXT		0x00002	/* vnode is a pure text prototype */
 #define	VSYSTEM		0x00004	/* vnode being used by kernel */
 #define	VISTTY		0x00008	/* vnode represents a tty */
 #define	VXLOCK		0x00100	/* vnode is locked to change underlying type */
 #define	VXWANT		0x00200	/* process is waiting for vnode */
 #define	VBWAIT		0x00400	/* waiting for output to complete */
 #define	VALIASED	0x00800	/* vnode has an alias */
 #define	VDIROP		0x01000	/* LFS: vnode is involved in a directory op */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
 #define	VNINACT		0x04000	/* LFS: skip ufs_inactive() in lfs_vunref */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
 #define	VDOOMED		0x40000	/* This vnode is being recycled */
 #define	VFREE		0x80000	/* This vnode is on the freelist */
 #define	VTBFREE		0x100000 /* This vnode is on the to-be-freelist */
 #define	VONWORKLST	0x200000 /* On syncer work-list */
 #define	VMOUNT		0x400000 /* Mount in progress */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
 	short		va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	udev_t		va_fsid;	/* file system id */
 	long		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	udev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define VA_EXCLUSIVE	0x02		/* exclusive create request */
 
 /*
  * Flags for ioflag.
  */
 #define	IO_UNIT		0x01		/* do I/O as atomic unit */
 #define	IO_APPEND	0x02		/* append write to end */
 #define	IO_SYNC		0x04		/* do I/O synchronously */
 #define	IO_NODELOCKED	0x08		/* underlying node already locked */
 #define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
 #define	IO_VMIO		0x20		/* data already in VMIO space */
 #define	IO_INVAL	0x40		/* invalidate after I/O */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
  */
 #define	VSUID	04000		/* set user id on execution */
 #define	VSGID	02000		/* set group id on execution */
 #define	VSVTX	01000		/* save swapped text even after use */
 #define	VREAD	00400		/* read, write, execute permissions */
 #define	VWRITE	00200
 #define	VEXEC	00100
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002		/* vflush: force file closure */
 #define	WRITECLOSE	0x0004		/* vflush: only close writable files */
 #define	DOCLOSE		0x0008		/* vclean: close active files */
 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
 #define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
 
 #define	VREF(vp)	vref(vp)
 
 
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 #define	VNODEOP_SET(f) \
 	C_SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_add_vnodeops, &f); \
 	C_SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND, vfs_rm_vnodeops, &f);
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	int desiredvnodes;		/* number of vnodes desired */
 extern	time_t syncdelay;		/* max time to delay syncing data */
 extern	time_t filedelay;		/* time to delay syncing files */
 extern	time_t dirdelay;		/* time to delay syncing directories */
 extern	time_t metadelay;		/* time to delay syncing metadata */
 extern	struct vm_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
  */
 #define	LEASE_READ	0x1		/* Check lease for readers */
 #define	LEASE_WRITE	0x2		/* Check lease for modifiers */
 
 
 extern void	(*lease_updatetime) __P((int deltat));
 
 #define VSHOULDFREE(vp)	\
 	(!((vp)->v_flag & (VFREE|VDOOMED)) && \
 	 !(vp)->v_holdcnt && !(vp)->v_usecount && \
 	 (!(vp)->v_object || \
 	  !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
 
 #define VSHOULDBUSY(vp)	\
 	(((vp)->v_flag & (VFREE|VTBFREE)) && \
 	 ((vp)->v_holdcnt || (vp)->v_usecount))
 
 #endif /* KERNEL */
 
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define VDESC_VP0_WILLRELE	0x0001
 #define VDESC_VP1_WILLRELE	0x0002
 #define VDESC_VP2_WILLRELE	0x0004
 #define VDESC_VP3_WILLRELE	0x0008
 #define VDESC_NOMAP_VPP		0x0100
 #define VDESC_VPP_WILLRELE	0x0200
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	int	vdesc_offset;		/* offset in vector--first for speed */
 	char    *vdesc_name;		/* a readable name for debugging */
 	int	vdesc_flags;		/* VDESC_* flags */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_proc_offset;	/* proc location, if any */
 	int	vdesc_componentname_offset; /* if any */
 	/*
 	 * Finally, we've got a list of private data (about each operation)
 	 * for each transport layer.  (Support to manage this list is not
 	 * yet part of BSD.)
 	 */
 	caddr_t	*vdesc_transports;
 };
 
 #ifdef KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 /*
  * Interlock for scanning list of vnodes attached to a mountpoint
  */
 extern struct simplelock mntvnode_slock;
 
 /*
  * This macro is very helpful in defining those offsets in the vdesc struct.
  *
  * This is stolen from X11R4.  I ignored all the fancy stuff for
  * Crays, so if you decide to port this to such a serious machine,
  * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
  */
 #define VOPARG_OFFSET(p_type,field) \
         ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
 #define VOPARG_OFFSETOF(s_type,field) \
 	VOPARG_OFFSET(s_type*,field)
 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
 	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
 
 
 /*
  * This structure is used to configure the new vnodeops vector.
  */
 struct vnodeopv_entry_desc {
 	struct vnodeop_desc *opve_op;   /* which operation this is */
 	vop_t *opve_impl;		/* code implementing this operation */
 };
 struct vnodeopv_desc {
 			/* ptr to the ptr to the vector where op should go */
 	vop_t ***opv_desc_vector_p;
 	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
 };
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Macros to aid in tracing VFS locking problems.  Not totally
  * reliable since if the process sleeps between changing the lock
  * state and checking it with the assert, some other process could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  I find that 'cvs co src'
  * is a pretty good test.
  */
 
 /*
  * [dfr] Kludge until I get around to fixing all the vfs locking.
  */
 #define IS_LOCKING_VFS(vp)	((vp)->v_tag == VT_UFS		\
 				 || (vp)->v_tag == VT_MFS	\
 				 || (vp)->v_tag == VT_NFS	\
 				 || (vp)->v_tag == VT_LFS	\
 				 || (vp)->v_tag == VT_ISOFS	\
 				 || (vp)->v_tag == VT_MSDOSFS	\
 				 || (vp)->v_tag == VT_DEVFS)
 
 #define ASSERT_VOP_LOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) {	\
 	panic("%s: %p is not locked but should be", str, vp);	\
     }
 
 #define ASSERT_VOP_UNLOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) {	\
 	panic("%s: %p is locked but shouldn't be", str, vp);	\
     }
 
 #else
 
 #define ASSERT_VOP_LOCKED(vp, str)
 #define ASSERT_VOP_UNLOCKED(vp, str)
 
 #endif
 
 /*
  * VOCALL calls an op given an ops vector.  We break it out because BSD's
  * vclean changes the ops vector and then wants to call ops with the old
  * vector.
  */
 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
 #define VDESC(OP) (& __CONCAT(OP,_desc))
 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
 
 /*
  * VMIO support inline
  */
 
 extern int vmiodirenable;
  
 static __inline int
 vn_canvmio(struct vnode *vp) 
 {
     if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
         return(TRUE); 
     return(FALSE); 
 }
 
 /*
  * Finally, include the default set of vnode operations.
  */
 #include "vnode_if.h"
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
 struct proc;
 struct stat;
 struct nstat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vnode;
 struct vop_bwrite_args;
 
 extern int	(*lease_check_hook) __P((struct vop_lease_args *));
 
 int 	bdevvp __P((dev_t dev, struct vnode **vpp));
 /* cache_* may belong in namei.h. */
 void	cache_enter __P((struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp));
 int	cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp));
 void	cache_purge __P((struct vnode *vp));
 void	cache_purgevfs __P((struct mount *mp));
 void	cvtstat __P((struct stat *st, struct ostat *ost));
 void	cvtnstat __P((struct stat *sb, struct nstat *nsb));
 int 	getnewvnode __P((enum vtagtype tag,
 	    struct mount *mp, vop_t **vops, struct vnode **vpp));
 int	lease_check __P((struct vop_lease_args *ap));
+int	spec_vnoperate __P((struct vop_generic_args *));
 int	speedup_syncer __P((void));
 void 	vattr_null __P((struct vattr *vap));
 int 	vcount __P((struct vnode *vp));
 void	vdrop __P((struct vnode *));
 int	vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
 void	vfs_add_vnodeops __P((const void *));
 void	vfs_rm_vnodeops __P((const void *));
 int	vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
 int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
 void 	vgone __P((struct vnode *vp));
 void	vhold __P((struct vnode *));
 int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
 
 	    struct proc *p, int slpflag, int slptimeo));
 int	vtruncbuf __P((struct vnode *vp, struct ucred *cred, struct proc *p,
 		off_t length, int blksize));
 void	vprint __P((char *label, struct vnode *vp));
 int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 #ifdef	DEBUG_LOCKS
 int	debug_vn_lock __P((struct vnode *vp, int flags, struct proc *p,
 	    const char *filename, int line));
 #define vn_lock(vp,flags,p) debug_vn_lock(vp,flags,p,__FILE__,__LINE__)
 #endif
 int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
 void	vn_pollevent __P((struct vnode *vp, int events));
 void	vn_pollgone __P((struct vnode *vp));
 int	vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
 int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 dev_t	vn_todev __P((struct vnode *vp));
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred));
 int 	vn_writechk __P((struct vnode *vp));
 int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
 int	vop_noislocked __P((struct vop_islocked_args *));
 int	vop_nolock __P((struct vop_lock_args *));
 int	vop_nopoll __P((struct vop_poll_args *));
 int	vop_nounlock __P((struct vop_unlock_args *));
 int	vop_stdpathconf __P((struct vop_pathconf_args *));
 int	vop_stdpoll __P((struct vop_poll_args *));
 int	vop_revoke __P((struct vop_revoke_args *));
 int	vop_sharedlock __P((struct vop_lock_args *));
 int	vop_eopnotsupp __P((struct vop_generic_args *ap));
 int	vop_ebadf __P((struct vop_generic_args *ap));
 int	vop_einval __P((struct vop_generic_args *ap));
 int	vop_enotty __P((struct vop_generic_args *ap));
 int	vop_defaultop __P((struct vop_generic_args *ap));
 int	vop_null __P((struct vop_generic_args *ap));
 int	vop_panic __P((struct vop_generic_args *ap));
 
 struct vnode *
 	checkalias __P((struct vnode *vp, udev_t nvp_rdev, struct mount *mp));
 void 	vput __P((struct vnode *vp));
 void 	vrele __P((struct vnode *vp));
 void	vref __P((struct vnode *vp));
 void	vbusy __P((struct vnode *vp));
 
 extern	vop_t	**default_vnodeop_p;
+extern	vop_t **spec_vnodeop_p;
 
 extern TAILQ_HEAD(tobefreelist, vnode)
 	vnode_tobefree_list;	/* vnode free list */
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
Index: head/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	(revision 49534)
+++ head/sys/ufs/ffs/ffs_softdep.c	(revision 49535)
@@ -1,4485 +1,4485 @@
 /*
  * Copyright 1998 Marshall Kirk McKusick. All Rights Reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
  * "Soft Updates: A Solution to the Metadata Update Problem in File
  * Systems", CSE-TR-254-95, August 1995).
  *
  * The following are the copyrights and redistribution conditions that
  * apply to this copy of the soft update software. For a license
  * to use, redistribute or sell the soft update software under
  * conditions other than those described here, please contact the
  * author at one of the following addresses:
  *
  *	Marshall Kirk McKusick		mckusick@mckusick.com
  *	1614 Oxford Street		+1-510-843-9542
  *	Berkeley, CA 94709-1608
  *	USA
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. None of the names of McKusick, Ganger, Patt, or the University of
  *    Michigan may be used to endorse or promote products derived from
  *    this software without specific prior written permission.
  * 4. Redistributions in any form must be accompanied by information on
  *    how to obtain complete source code for any accompanying software
  *    that uses this software. This source code must either be included
  *    in the distribution or be available for no more than the cost of
  *    distribution plus a nominal fee, and must be freely redistributable
  *    under reasonable conditions. For an executable file, complete
  *    source code means the source code for all modules it contains.
  *    It does not mean source code for modules or files that typically
  *    accompany the operating system on which the executable file runs,
  *    e.g., standard library modules or system header files.
  *
  * THIS SOFTWARE IS PROVIDED BY MARSHALL KIRK MCKUSICK ``AS IS'' AND ANY
  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL MARSHALL KIRK MCKUSICK BE LIABLE FOR
  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.40 (McKusick) 6/15/99
- *	$Id: ffs_softdep.c,v 1.33 1999/06/27 13:26:23 peter Exp $
+ *	$Id: ffs_softdep.c,v 1.34 1999/06/29 15:57:40 mckusick Exp $
  */
 
 /*
  * For now we want the safety net that the DIAGNOSTIC and DEBUG flags provide.
  */
 #ifndef DIAGNOSTIC
 #define DIAGNOSTIC
 #endif
 #ifndef DEBUG
 #define DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/syslog.h>
 #include <sys/vnode.h>
-#include <miscfs/specfs/specdev.h>
+#include <sys/conf.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/softdep.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
 /*
  * These definitions need to be adapted to the system to which
  * this file is being ported.
  */
 /*
  * malloc types defined for the softdep system.
  */
 MALLOC_DEFINE(M_PAGEDEP, "pagedep","File page dependencies");
 MALLOC_DEFINE(M_INODEDEP, "inodedep","Inode dependencies");
 MALLOC_DEFINE(M_NEWBLK, "newblk","New block allocation");
 MALLOC_DEFINE(M_BMSAFEMAP, "bmsafemap","Block or frag allocated from cyl group map");
 MALLOC_DEFINE(M_ALLOCDIRECT, "allocdirect","Block or frag dependency for an inode");
 MALLOC_DEFINE(M_INDIRDEP, "indirdep","Indirect block dependencies");
 MALLOC_DEFINE(M_ALLOCINDIR, "allocindir","Block dependency for an indirect block");
 MALLOC_DEFINE(M_FREEFRAG, "freefrag","Previously used frag for an inode");
 MALLOC_DEFINE(M_FREEBLKS, "freeblks","Blocks freed from an inode");
 MALLOC_DEFINE(M_FREEFILE, "freefile","Inode deallocated");
 MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");
 MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");
 MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");
 
 #define	D_PAGEDEP	0
 #define	D_INODEDEP	1
 #define	D_NEWBLK	2
 #define	D_BMSAFEMAP	3
 #define	D_ALLOCDIRECT	4
 #define	D_INDIRDEP	5
 #define	D_ALLOCINDIR	6
 #define	D_FREEFRAG	7
 #define	D_FREEBLKS	8
 #define	D_FREEFILE	9
 #define	D_DIRADD	10
 #define	D_MKDIR		11
 #define	D_DIRREM	12
 #define D_LAST		D_DIRREM
 
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
  */
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
 	M_NEWBLK,
 	M_BMSAFEMAP,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
 	M_FREEFRAG,
 	M_FREEBLKS,
 	M_FREEFILE,
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM
 };
 
 #define DtoM(type) (memtype[type])
 
 /*
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
 	((unsigned)(type) < D_LAST ? memtype[type]->ks_shortdesc : "???")
 #define CURPROC curproc
 /*
  * End system adaptaion definitions.
  */
 
 /*
  * Internal function prototypes.
  */
 static	void softdep_error __P((char *, int));
 static	void drain_output __P((struct vnode *, int));
 static	int getdirtybuf __P((struct buf **, int));
 static	void clear_remove __P((struct proc *));
 static	void clear_inodedeps __P((struct proc *));
 static	int flush_pagedep_deps __P((struct vnode *, struct mount *,
 	    struct diraddhd *));
 static	int flush_inodedep_deps __P((struct fs *, ino_t));
 static	int handle_written_filepage __P((struct pagedep *, struct buf *));
 static  void diradd_inode_written __P((struct diradd *, struct inodedep *));
 static	int handle_written_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_allocdirect_partdone __P((struct allocdirect *));
 static	void handle_allocindir_partdone __P((struct allocindir *));
 static	void initiate_write_filepage __P((struct pagedep *, struct buf *));
 static	void handle_written_mkdir __P((struct mkdir *, int));
 static	void initiate_write_inodeblock __P((struct inodedep *, struct buf *));
 static	void handle_workitem_freefile __P((struct freefile *));
 static	void handle_workitem_remove __P((struct dirrem *));
 static	struct dirrem *newdirrem __P((struct buf *, struct inode *,
 	    struct inode *, int));
 static	void free_diradd __P((struct diradd *));
 static	void free_allocindir __P((struct allocindir *, struct inodedep *));
 static	int indir_trunc __P((struct inode *, ufs_daddr_t, int, ufs_lbn_t,
 	    long *));
 static	void deallocate_dependencies __P((struct buf *, struct inodedep *));
 static	void free_allocdirect __P((struct allocdirectlst *,
 	    struct allocdirect *, int));
 static	int free_inodedep __P((struct inodedep *));
 static	void handle_workitem_freeblocks __P((struct freeblks *));
 static	void merge_inode_lists __P((struct inodedep *));
 static	void setup_allocindir_phase2 __P((struct buf *, struct inode *,
 	    struct allocindir *));
 static	struct allocindir *newallocindir __P((struct inode *, int, ufs_daddr_t,
 	    ufs_daddr_t));
 static	void handle_workitem_freefrag __P((struct freefrag *));
 static	struct freefrag *newfreefrag __P((struct inode *, ufs_daddr_t, long));
 static	void allocdirect_merge __P((struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *));
 static	struct bmsafemap *bmsafemap_lookup __P((struct buf *));
 static	int newblk_lookup __P((struct fs *, ufs_daddr_t, int,
 	    struct newblk **));
 static	int inodedep_lookup __P((struct fs *, ino_t, int, struct inodedep **));
 static	int pagedep_lookup __P((struct inode *, ufs_lbn_t, int,
 	    struct pagedep **));
 static	void pause_timer __P((void *));
 static	int request_cleanup __P((int, int));
 static	void add_to_worklist __P((struct worklist *));
 
 /*
  * Exported softdep operations.
  */
 struct bio_ops bioops = {
 	softdep_disk_io_initiation,		/* io_start */
 	softdep_disk_write_complete,		/* io_complete */
 	softdep_deallocate_dependencies,	/* io_deallocate */
 	softdep_fsync,				/* io_fsync */
 	softdep_process_worklist,		/* io_sync */
 };
 
 /*
  * Locking primitives.
  *
  * For a uniprocessor, all we need to do is protect against disk
  * interrupts. For a multiprocessor, this lock would have to be
  * a mutex. A single mutex is used throughout this file, though
  * finer grain locking could be used if contention warranted it.
  *
  * For a multiprocessor, the sleep call would accept a lock and
  * release it after the sleep processing was complete. In a uniprocessor
  * implementation there is no such interlock, so we simple mark
  * the places where it needs to be done with the `interlocked' form
  * of the lock calls. Since the uniprocessor sleep already interlocks
  * the spl, there is nothing that really needs to be done.
  */
 #ifndef /* NOT */ DEBUG
 static struct lockit {
 	int	lkt_spl;
 } lk = { 0 };
 #define ACQUIRE_LOCK(lk)		(lk)->lkt_spl = splbio()
 #define FREE_LOCK(lk)			splx((lk)->lkt_spl)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)
 #define FREE_LOCK_INTERLOCKED(lk)
 
 #else /* DEBUG */
 static struct lockit {
 	int	lkt_spl;
 	pid_t	lkt_held;
 } lk = { 0, -1 };
 static int lockcnt;
 
 static	void acquire_lock __P((struct lockit *));
 static	void free_lock __P((struct lockit *));
 static	void acquire_lock_interlocked __P((struct lockit *));
 static	void free_lock_interlocked __P((struct lockit *));
 
 #define ACQUIRE_LOCK(lk)		acquire_lock(lk)
 #define FREE_LOCK(lk)			free_lock(lk)
 #define ACQUIRE_LOCK_INTERLOCKED(lk)	acquire_lock_interlocked(lk)
 #define FREE_LOCK_INTERLOCKED(lk)	free_lock_interlocked(lk)
 
 static void
 acquire_lock(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held != -1) {
 		if (lk->lkt_held == CURPROC->p_pid)
 			panic("softdep_lock: locking against myself");
 		else
 			panic("softdep_lock: lock held by %d", lk->lkt_held);
 	}
 	lk->lkt_spl = splbio();
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock: lock not held");
 	lk->lkt_held = -1;
 	splx(lk->lkt_spl);
 }
 
 static void
 acquire_lock_interlocked(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held != -1) {
 		if (lk->lkt_held == CURPROC->p_pid)
 			panic("softdep_lock_interlocked: locking against self");
 		else
 			panic("softdep_lock_interlocked: lock held by %d",
 			    lk->lkt_held);
 	}
 	lk->lkt_held = CURPROC->p_pid;
 	lockcnt++;
 }
 
 static void
 free_lock_interlocked(lk)
 	struct lockit *lk;
 {
 
 	if (lk->lkt_held == -1)
 		panic("softdep_unlock_interlocked: lock not held");
 	lk->lkt_held = -1;
 }
 #endif /* DEBUG */
 
 /*
  * Place holder for real semaphores.
  */
 struct sema {
 	int	value;
 	pid_t	holder;
 	char	*name;
 	int	prio;
 	int	timo;
 };
 static	void sema_init __P((struct sema *, char *, int, int));
 static	int sema_get __P((struct sema *, struct lockit *));
 static	void sema_release __P((struct sema *));
 
 static void
 sema_init(semap, name, prio, timo)
 	struct sema *semap;
 	char *name;
 	int prio, timo;
 {
 
 	semap->holder = -1;
 	semap->value = 0;
 	semap->name = name;
 	semap->prio = prio;
 	semap->timo = timo;
 }
 
 static int
 sema_get(semap, interlock)
 	struct sema *semap;
 	struct lockit *interlock;
 {
 
 	if (semap->value++ > 0) {
 		if (interlock != NULL)
 			FREE_LOCK_INTERLOCKED(interlock);
 		tsleep((caddr_t)semap, semap->prio, semap->name, semap->timo);
 		if (interlock != NULL) {
 			ACQUIRE_LOCK_INTERLOCKED(interlock);
 			FREE_LOCK(interlock);
 		}
 		return (0);
 	}
 	semap->holder = CURPROC->p_pid;
 	if (interlock != NULL)
 		FREE_LOCK(interlock);
 	return (1);
 }
 
 static void
 sema_release(semap)
 	struct sema *semap;
 {
 
 	if (semap->value <= 0 || semap->holder != CURPROC->p_pid)
 		panic("sema_release: not held");
 	if (--semap->value > 0) {
 		semap->value = 0;
 		wakeup(semap);
 	}
 	semap->holder = -1;
 }
 
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
 #ifndef /* NOT */ DEBUG
 #define WORKLIST_INSERT(head, item) do {	\
 	(item)->wk_state |= ONWORKLIST;		\
 	LIST_INSERT_HEAD(head, item, wk_list);	\
 } while (0)
 #define WORKLIST_REMOVE(item) do {		\
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
 #define WORKITEM_FREE(item, type) FREE(item, DtoM(type))
 
 #else /* DEBUG */
 static	void worklist_insert __P((struct workhead *, struct worklist *));
 static	void worklist_remove __P((struct worklist *));
 static	void workitem_free __P((struct worklist *, int));
 
 #define WORKLIST_INSERT(head, item) worklist_insert(head, item)
 #define WORKLIST_REMOVE(item) worklist_remove(item)
 #define WORKITEM_FREE(item, type) workitem_free((struct worklist *)item, type)
 
 static void
 worklist_insert(head, item)
 	struct workhead *head;
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_insert: lock not held");
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: already on list");
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
 worklist_remove(item)
 	struct worklist *item;
 {
 
 	if (lk.lkt_held == -1)
 		panic("worklist_remove: lock not held");
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: not on list");
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 
 static void
 workitem_free(item, type)
 	struct worklist *item;
 	int type;
 {
 
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: still on list");
 	if (item->wk_type != type)
 		panic("workitem_free: type mismatch");
 	FREE(item, DtoM(type));
 }
 #endif /* DEBUG */
 
 /*
  * Workitem queue management
  */
 static struct workhead softdep_workitem_pending;
 static int softdep_worklist_busy;
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static struct proc *filesys_syncer; /* proc of filesystem syncer process */
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 #define FLUSH_INODES	1
 static int req_clear_remove;	/* syncer process flush some freeblks */
 #define FLUSH_REMOVE	2
 /*
  * runtime statistics
  */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
 static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
 static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
 static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 #ifdef DEBUG
 #include <vm/vm.h>
 #include <sys/sysctl.h>
 #if defined(__FreeBSD__)
 SYSCTL_INT(_debug, OID_AUTO, max_softdeps, CTLFLAG_RW, &max_softdeps, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, tickdelay, CTLFLAG_RW, &tickdelay, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_push, CTLFLAG_RW, &stat_blk_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_push, CTLFLAG_RW, &stat_ino_limit_push, 0,"");
 SYSCTL_INT(_debug, OID_AUTO, blk_limit_hit, CTLFLAG_RW, &stat_blk_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, ino_limit_hit, CTLFLAG_RW, &stat_ino_limit_hit, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW, &stat_indir_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, inode_bitmap, CTLFLAG_RW, &stat_inode_bitmap, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW, &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug, OID_AUTO, dir_entry, CTLFLAG_RW, &stat_dir_entry, 0, "");
 #else /* !__FreeBSD__ */
 struct ctldebug debug20 = { "max_softdeps", &max_softdeps };
 struct ctldebug debug21 = { "tickdelay", &tickdelay };
 struct ctldebug debug23 = { "blk_limit_push", &stat_blk_limit_push };
 struct ctldebug debug24 = { "ino_limit_push", &stat_ino_limit_push };
 struct ctldebug debug25 = { "blk_limit_hit", &stat_blk_limit_hit };
 struct ctldebug debug26 = { "ino_limit_hit", &stat_ino_limit_hit };
 struct ctldebug debug27 = { "indir_blk_ptrs", &stat_indir_blk_ptrs };
 struct ctldebug debug28 = { "inode_bitmap", &stat_inode_bitmap };
 struct ctldebug debug29 = { "direct_blk_ptrs", &stat_direct_blk_ptrs };
 struct ctldebug debug30 = { "dir_entry", &stat_dir_entry };
 #endif	/* !__FreeBSD__ */
 
 #endif /* DEBUG */
 
 /*
  * Add an item to the end of the work queue.
  * This routine requires that the lock be held.
  * This is the only routine that adds items to the list.
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
 static void
 add_to_worklist(wk)
 	struct worklist *wk;
 {
 	static struct worklist *worklist_tail;
 
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: already on list");
 	wk->wk_state |= ONWORKLIST;
 	if (LIST_FIRST(&softdep_workitem_pending) == NULL)
 		LIST_INSERT_HEAD(&softdep_workitem_pending, wk, wk_list);
 	else
 		LIST_INSERT_AFTER(worklist_tail, wk, wk_list);
 	worklist_tail = wk;
 }
 
 /*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
  * appear in the queue. The code below depends on this property to ensure
  * that blocks of a file are freed before the inode itself is freed. This
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
 int 
 softdep_process_worklist(matchmnt)
 	struct mount *matchmnt;
 {
 	struct proc *p = CURPROC;
 	struct worklist *wk;
 	struct fs *matchfs;
 	int matchcnt;
 
 	/*
 	 * Record the process identifier of our caller so that we can give
 	 * this process preferential treatment in request_cleanup below.
 	 */
 	filesys_syncer = p;
 	matchcnt = 0;
 	matchfs = NULL;
 	if (matchmnt != NULL)
 		matchfs = VFSTOUFS(matchmnt)->um_fs;
 	/*
 	 * There is no danger of having multiple processes run this
 	 * code. It is single threaded solely so that softdep_flushfiles
 	 * (below) can get an accurate count of the number of items
 	 * related to its mount point that are in the list.
 	 */
 	if (softdep_worklist_busy && matchmnt == NULL)
 		return (-1);
 	/*
 	 * If requested, try removing inode or removal dependencies.
 	 */
 	if (req_clear_inodedeps) {
 		clear_inodedeps(p);
 		req_clear_inodedeps = 0;
 		wakeup(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		clear_remove(p);
 		req_clear_remove = 0;
 		wakeup(&proc_waiting);
 	}
 	ACQUIRE_LOCK(&lk);
 	while ((wk = LIST_FIRST(&softdep_workitem_pending)) != 0) {
 		WORKLIST_REMOVE(wk);
 		FREE_LOCK(&lk);
 		switch (wk->wk_type) {
 
 		case D_DIRREM:
 			/* removal of a directory entry */
 			if (WK_DIRREM(wk)->dm_mnt == matchmnt)
 				matchcnt += 1;
 			handle_workitem_remove(WK_DIRREM(wk));
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
 			if (WK_FREEBLKS(wk)->fb_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freeblocks(WK_FREEBLKS(wk));
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
 			if (WK_FREEFRAG(wk)->ff_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
 			if (WK_FREEFILE(wk)->fx_fs == matchfs)
 				matchcnt += 1;
 			handle_workitem_freefile(WK_FREEFILE(wk));
 			break;
 
 		default:
 			panic("%s_process_worklist: Unknown type %s",
 			    "softdep", TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 		if (softdep_worklist_busy && matchmnt == NULL)
 			return (-1);
 		/*
 		 * If requested, try removing inode or removal dependencies.
 		 */
 		if (req_clear_inodedeps) {
 			clear_inodedeps(p);
 			req_clear_inodedeps = 0;
 			wakeup(&proc_waiting);
 		}
 		if (req_clear_remove) {
 			clear_remove(p);
 			req_clear_remove = 0;
 			wakeup(&proc_waiting);
 		}
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
 	return (matchcnt);
 }
 
 /*
  * Purge the work list of all items associated with a particular mount point.
  */
 int
 softdep_flushfiles(oldmnt, flags, p)
 	struct mount *oldmnt;
 	int flags;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	int error, loopcnt;
 
 	/*
 	 * Await our turn to clear out the queue.
 	 */
 	while (softdep_worklist_busy)
 		tsleep(&lbolt, PRIBIO, "softflush", 0);
 	softdep_worklist_busy = 1;
 	if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0) {
 		softdep_worklist_busy = 0;
 		return (error);
 	}
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. In theory, this loop can happen at most twice,
 	 * but we give it a few extra just to be sure.
 	 */
 	devvp = VFSTOUFS(oldmnt)->um_devvp;
 	for (loopcnt = 10; loopcnt > 0; loopcnt--) {
 		if (softdep_process_worklist(oldmnt) == 0) {
 			/*
 			 * Do another flush in case any vnodes were brought in
 			 * as part of the cleanup operations.
 			 */
 			if ((error = ffs_flushfiles(oldmnt, flags, p)) != 0)
 				break;
 			/*
 			 * If we still found nothing to do, we are really done.
 			 */
 			if (softdep_process_worklist(oldmnt) == 0)
 				break;
 		}
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_FSYNC(devvp, p->p_ucred, MNT_WAIT, p);
 		VOP_UNLOCK(devvp, 0, p);
 		if (error)
 			break;
 	}
 	softdep_worklist_busy = 0;
 	/*
 	 * If we are unmounting then it is an error to fail. If we
 	 * are simply trying to downgrade to read-only, then filesystem
 	 * activity can keep us busy forever, so we just fail with EBUSY.
 	 */
 	if (loopcnt == 0) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
 			panic("softdep_flushfiles: looping");
 		error = EBUSY;
 	}
 	return (error);
 }
 
 /*
  * Structure hashing.
  * 
  * There are three types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
  * This separation helps when the in-memory copy of an inode or
  * file block must be replaced. It also obviates the need to access
  * an inode or file page when simply updating (or de-allocating)
  * dependency structures. Lookup of newblk structures is needed to
  * find newly allocated blocks when trying to associate them with
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
  * an existing entry is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
 LIST_HEAD(pagedep_hashhead, pagedep) *pagedep_hashtbl;
 u_long	pagedep_hash;		/* size of hash table - 1 */
 #define	PAGEDEP_HASH(mp, inum, lbn) \
 	(&pagedep_hashtbl[((((register_t)(mp)) >> 13) + (inum) + (lbn)) & \
 	    pagedep_hash])
 static struct sema pagedep_in_progress;
 
 /*
  * Look up a pagedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(ip, lbn, flags, pagedeppp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct mount *mp;
 	int i;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("pagedep_lookup: lock not held");
 #endif
 	mp = ITOV(ip)->v_mount;
 	pagedephd = PAGEDEP_HASH(mp, ip->i_number, lbn);
 top:
 	for (pagedep = LIST_FIRST(pagedephd); pagedep;
 	     pagedep = LIST_NEXT(pagedep, pd_hash))
 		if (ip->i_number == pagedep->pd_ino &&
 		    lbn == pagedep->pd_lbn &&
 		    mp == pagedep->pd_mnt)
 			break;
 	if (pagedep) {
 		*pagedeppp = pagedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*pagedeppp = NULL;
 		return (0);
 	}
 	if (sema_get(&pagedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
 		M_WAITOK);
 	bzero(pagedep, sizeof(struct pagedep));
 	pagedep->pd_list.wk_type = D_PAGEDEP;
 	pagedep->pd_mnt = mp;
 	pagedep->pd_ino = ip->i_number;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	sema_release(&pagedep_in_progress);
 	*pagedeppp = pagedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with inodedep caching.
  */
 LIST_HEAD(inodedep_hashhead, inodedep) *inodedep_hashtbl;
 static u_long	inodedep_hash;	/* size of hash table - 1 */
 static long	num_inodedep;	/* number of inodedep allocated */
 #define	INODEDEP_HASH(fs, inum) \
       (&inodedep_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & inodedep_hash])
 static struct sema inodedep_in_progress;
 
 /*
  * Look up a inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(fs, inum, flags, inodedeppp)
 	struct fs *fs;
 	ino_t inum;
 	int flags;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
 	int firsttry;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("inodedep_lookup: lock not held");
 #endif
 	firsttry = 1;
 	inodedephd = INODEDEP_HASH(fs, inum);
 top:
 	for (inodedep = LIST_FIRST(inodedephd); inodedep;
 	     inodedep = LIST_NEXT(inodedep, id_hash))
 		if (inum == inodedep->id_ino && fs == inodedep->id_fs)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*inodedeppp = NULL;
 		return (0);
 	}
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_inodedep > max_softdeps && firsttry && speedup_syncer() == 0 &&
 	    request_cleanup(FLUSH_INODES, 1)) {
 		firsttry = 0;
 		goto top;
 	}
 	if (sema_get(&inodedep_in_progress, &lk) == 0) {
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 	num_inodedep += 1;
 	MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
 		M_INODEDEP, M_WAITOK);
 	inodedep->id_list.wk_type = D_INODEDEP;
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
 	inodedep->id_state = ALLCOMPLETE;
 	inodedep->id_nlinkdelta = 0;
 	inodedep->id_savedino = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_buf = NULL;
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	ACQUIRE_LOCK(&lk);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	sema_release(&inodedep_in_progress);
 	*inodedeppp = inodedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with newblk caching.
  */
 LIST_HEAD(newblk_hashhead, newblk) *newblk_hashtbl;
 u_long	newblk_hash;		/* size of hash table - 1 */
 #define	NEWBLK_HASH(fs, inum) \
 	(&newblk_hashtbl[((((register_t)(fs)) >> 13) + (inum)) & newblk_hash])
 static struct sema newblk_in_progress;
 
 /*
  * Look up a newblk. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in newblkpp.
  */
 static int
 newblk_lookup(fs, newblkno, flags, newblkpp)
 	struct fs *fs;
 	ufs_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 
 	newblkhd = NEWBLK_HASH(fs, newblkno);
 top:
 	for (newblk = LIST_FIRST(newblkhd); newblk;
 	     newblk = LIST_NEXT(newblk, nb_hash))
 		if (newblkno == newblk->nb_newblkno && fs == newblk->nb_fs)
 			break;
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0) {
 		*newblkpp = NULL;
 		return (0);
 	}
 	if (sema_get(&newblk_in_progress, 0) == 0)
 		goto top;
 	MALLOC(newblk, struct newblk *, sizeof(struct newblk),
 		M_NEWBLK, M_WAITOK);
 	newblk->nb_state = 0;
 	newblk->nb_fs = fs;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	sema_release(&newblk_in_progress);
 	*newblkpp = newblk;
 	return (0);
 }
 
 /*
  * Executed during filesystem system initialization before
  * mounting any file systems.
  */
 void 
 softdep_initialize()
 {
 
 	LIST_INIT(&mkdirlisthd);
 	LIST_INIT(&softdep_workitem_pending);
 	max_softdeps = desiredvnodes * 8;
 	pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
 	    &pagedep_hash);
 	sema_init(&pagedep_in_progress, "pagedep", PRIBIO, 0);
 	inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP, &inodedep_hash);
 	sema_init(&inodedep_in_progress, "inodedep", PRIBIO, 0);
 	newblk_hashtbl = hashinit(64, M_NEWBLK, &newblk_hash);
 	sema_init(&newblk_in_progress, "newblk", PRIBIO, 0);
 }
 
 /*
  * Called at mount time to notify the dependency code that a
  * filesystem wishes to use it.
  */
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct csum cstotal;
 	struct cg *cgp;
 	struct buf *bp;
 	int error, cyl;
 
 	mp->mnt_flag &= ~MNT_ASYNC;
 	mp->mnt_flag |= MNT_SOFTDEP;
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync, so we have
 	 * to scan the cylinder groups and recalculate them.
 	 */
 	if (fs->fs_clean != 0)
 		return (0);
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
 		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
 		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
 		fs->fs_cs(fs, cyl) = cgp->cg_cs;
 		brelse(bp);
 	}
 #ifdef DEBUG
 	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
 		printf("ffs_mountfs: superblock updated for soft updates\n");
 #endif
 	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
 	return (0);
 }
 
 /*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a file system
  * after a power failure, one must (conservatively) guarantee that the
  * on-disk copy of the bitmaps never indicate that a live inode or block is
  * free.  So, when a block or inode is allocated, the bitmap should be
  * updated (on disk) before any new pointers.  When a block or inode is
  * freed, the bitmap should not be updated until all pointers have been
  * reset.  The latter dependency is handled by the delayed de-allocation
  * approach described below for block and inode de-allocation.  The former
  * dependency is handled by calling the following procedure when a block or
  * inode is allocated. When an inode is allocated an "inodedep" is created
  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
  * Each "inodedep" is also inserted into the hash indexing structure so
  * that any additional link additions can be made dependent on the inode
  * allocation.
  * 
  * The ufs file system maintains a number of free block counts (e.g., per
  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
  * in addition to the bitmaps.  These counts are used to improve efficiency
  * during allocation and therefore must be consistent with the bitmaps.
  * There is no convenient way to guarantee post-crash consistency of these
  * counts with simple update ordering, for two main reasons: (1) The counts
  * and bitmaps for a single cylinder group block are not in the same disk
  * sector.  If a disk write is interrupted (e.g., by power failure), one may
  * be written and the other not.  (2) Some of the counts are located in the
  * superblock rather than the cylinder group block. So, we focus our soft
  * updates implementation on protecting the bitmaps. When mounting a
  * filesystem, we recompute the auxiliary counts from the bitmaps.
  */
 
 /*
  * Called just after updating the cylinder group block to allocate an inode.
  */
 void
 softdep_setup_inomapdep(bp, ip, newinum)
 	struct buf *bp;		/* buffer for cylgroup block with inode map */
 	struct inode *ip;	/* inode related to allocation */
 	ino_t newinum;		/* new inode number being allocated */
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, newinum, DEPALLOC, &inodedep) != 0)
 		panic("softdep_setup_inomapdep: found inode");
 	inodedep->id_buf = bp;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called just after updating the cylinder group block to
  * allocate block or fragment.
  */
 void
 softdep_setup_blkmapdep(bp, fs, newblkno)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct fs *fs;		/* filesystem doing allocation */
 	ufs_daddr_t newblkno;	/* number of newly allocated block */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	if (newblk_lookup(fs, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	ACQUIRE_LOCK(&lk);
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(bp);
 	LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
  * this routine is called and this routine must be called with
  * splbio interrupts blocked.
  */
 static struct bmsafemap *
 bmsafemap_lookup(bp)
 	struct buf *bp;
 {
 	struct bmsafemap *bmsafemap;
 	struct worklist *wk;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("bmsafemap_lookup: lock not held");
 #endif
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = LIST_NEXT(wk, wk_list))
 		if (wk->wk_type == D_BMSAFEMAP)
 			return (WK_BMSAFEMAP(wk));
 	FREE_LOCK(&lk);
 	MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
 		M_BMSAFEMAP, M_WAITOK);
 	bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
 	bmsafemap->sm_list.wk_state = 0;
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_allocdirecthd);
 	LIST_INIT(&bmsafemap->sm_allocindirhd);
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
 /*
  * Direct block allocation dependencies.
  * 
  * When a new block is allocated, the corresponding disk locations must be
  * initialized (with zeros or new data) before the on-disk inode points to
  * them.  Also, the freemap from which the block was allocated must be
  * updated (on disk) before the inode's pointer. These two dependencies are
  * independent of each other and are needed for all file blocks and indirect
  * blocks that are pointed to directly by the inode.  Just before the
  * "in-core" version of the inode is updated with a newly allocated block
  * number, a procedure (below) is called to setup allocation dependency
  * structures.  These structures are removed when the corresponding
  * dependencies are satisfied or when the block allocation becomes obsolete
  * (i.e., the file is deleted, the block is de-allocated, or the block is a
  * fragment that gets upgraded).  All of these cases are handled in
  * procedures described later.
  * 
  * When a file extension causes a fragment to be upgraded, either to a larger
  * fragment or to a full block, the on-disk location may change (if the
  * previous fragment could not simply be extended). In this case, the old
  * fragment must be de-allocated, but not until after the inode's pointer has
  * been updated. In most cases, this is handled by later procedures, which
  * will construct a "freefrag" structure to be added to the workitem queue
  * when the inode update is complete (or obsolete).  The main exception to
  * this is when an allocation occurs while a pending allocation dependency
  * (for the same block pointer) remains.  This case is handled in the main
  * allocation dependency setup procedure by immediately freeing the
  * unreferenced fragments.
  */ 
 void 
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
 	ufs_lbn_t lbn;		/* block pointer within inode */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
 	long oldsize;		/* size of new block */
 	struct buf *bp;		/* bp for allocated block */
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct bmsafemap *bmsafemap;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct newblk *newblk;
 
 	MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
 		M_ALLOCDIRECT, M_WAITOK);
 	bzero(adp, sizeof(struct allocdirect));
 	adp->ad_list.wk_type = D_ALLOCDIRECT;
 	adp->ad_lbn = lbn;
 	adp->ad_newblkno = newblkno;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 	adp->ad_state = ATTACHED;
 	if (newblkno == oldblkno)
 		adp->ad_freefrag = NULL;
 	else
 		adp->ad_freefrag = newfreefrag(ip, oldblkno, oldsize);
 
 	if (newblk_lookup(ip->i_fs, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	if (newblk->nb_state == DEPCOMPLETE) {
 		adp->ad_state |= DEPCOMPLETE;
 		adp->ad_buf = NULL;
 	} else {
 		bmsafemap = newblk->nb_bmsafemap;
 		adp->ad_buf = bmsafemap->sm_buf;
 		LIST_REMOVE(newblk, nb_deps);
 		LIST_INSERT_HEAD(&bmsafemap->sm_allocdirecthd, adp, ad_deps);
 	}
 	LIST_REMOVE(newblk, nb_hash);
 	FREE(newblk, M_NEWBLK);
 
 	WORKLIST_INSERT(&bp->b_dep, &adp->ad_list);
 	if (lbn >= NDADDR) {
 		/* allocating an indirect block */
 		if (oldblkno != 0)
 			panic("softdep_setup_allocdirect: non-zero indir");
 	} else {
 		/*
 		 * Allocating a direct block.
 		 *
 		 * If we are allocating a directory block, then we must
 		 * allocate an associated pagedep to track additions and
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR &&
 		    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 			WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	}
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_lbn <= lbn) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_lbn == lbn)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(&lk);
 		return;
 	}
 	for (oldadp = TAILQ_FIRST(adphead); oldadp;
 	     oldadp = TAILQ_NEXT(oldadp, ad_next)) {
 		if (oldadp->ad_lbn >= lbn)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocdirect: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_lbn == lbn)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
 	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
 	struct allocdirect *newadp;	/* allocdirect being added */
 	struct allocdirect *oldadp;	/* existing allocdirect being checked */
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("allocdirect_merge: lock not held");
 #endif
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_lbn >= NDADDR)
 		panic("allocdirect_check: old %d != new %d || lbn %ld >= %d",
 		    newadp->ad_oldblkno, oldadp->ad_newblkno, newadp->ad_lbn,
 		    NDADDR);
 	newadp->ad_oldblkno = oldadp->ad_oldblkno;
 	newadp->ad_oldsize = oldadp->ad_oldsize;
 	/*
 	 * If the old dependency had a fragment to free or had never
 	 * previously had a block allocated, then the new dependency
 	 * can immediately post its freefrag and adopt the old freefrag.
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
 	 * the worklist when it is freed by free_allocdirect. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
 	 * Here, the first part of the fragment allocated to the new
 	 * dependency is part of the block currently claimed on disk by
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
 		freefrag = newadp->ad_freefrag;
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
 	free_allocdirect(adphead, oldadp, 0);
 }
 		
 /*
  * Allocate a new freefrag structure if needed.
  */
 static struct freefrag *
 newfreefrag(ip, blkno, size)
 	struct inode *ip;
 	ufs_daddr_t blkno;
 	long size;
 {
 	struct freefrag *freefrag;
 	struct fs *fs;
 
 	if (blkno == 0)
 		return (NULL);
 	fs = ip->i_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
 		M_FREEFRAG, M_WAITOK);
 	freefrag->ff_list.wk_type = D_FREEFRAG;
 	freefrag->ff_state = ip->i_uid & ~ONWORKLIST;	/* XXX - used below */
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_fs = fs;
 	freefrag->ff_devvp = ip->i_devvp;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
 	return (freefrag);
 }
 
 /*
  * This workitem de-allocates fragments that were replaced during
  * file block allocation.
  */
 static void 
 handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct inode tip;
 
 	tip.i_fs = freefrag->ff_fs;
 	tip.i_devvp = freefrag->ff_devvp;
 	tip.i_dev = freefrag->ff_devvp->v_rdev;
 	tip.i_number = freefrag->ff_inum;
 	tip.i_uid = freefrag->ff_state & ~ONWORKLIST;	/* XXX - set above */
 	ffs_blkfree(&tip, freefrag->ff_blkno, freefrag->ff_fragsize);
 	FREE(freefrag, M_FREEFRAG);
 }
 
 /*
  * Indirect block allocation dependencies.
  * 
  * The same dependencies that exist for a direct block also exist when
  * a new block is allocated and pointed to by an entry in a block of
  * indirect pointers. The undo/redo states described above are also
  * used here. Because an indirect block contains many pointers that
  * may have dependencies, a second copy of the entire in-memory indirect
  * block is kept. The buffer cache copy is always completely up-to-date.
  * The second copy, which is used only as a source for disk writes,
  * contains only the safe pointers (i.e., those that have no remaining
  * update dependencies). The second copy is freed when all pointers
  * are safe. The cache is not allowed to replace indirect blocks with
  * pending update dependencies. If a buffer containing an indirect
  * block with dependencies is written, these routines will mark it
  * dirty again. It can only be successfully written once all the
  * dependencies are removed. The ffs_fsync routine in conjunction with
  * softdep_sync_metadata work together to get all the dependencies
  * removed so that a file can be successfully written to disk. Three
  * procedures are used when setting up indirect block pointer
  * dependencies. The division is necessary because of the organization
  * of the "balloc" routine and because of the distinction between file
  * pages and file metadata blocks.
  */
 
 /*
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
 newallocindir(ip, ptrno, newblkno, oldblkno)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 {
 	struct allocindir *aip;
 
 	MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
 		M_ALLOCINDIR, M_WAITOK);
 	bzero(aip, sizeof(struct allocindir));
 	aip->ai_list.wk_type = D_ALLOCINDIR;
 	aip->ai_state = ATTACHED;
 	aip->ai_offset = ptrno;
 	aip->ai_newblkno = newblkno;
 	aip->ai_oldblkno = oldblkno;
 	aip->ai_freefrag = newfreefrag(ip, oldblkno, ip->i_fs->fs_bsize);
 	return (aip);
 }
 
 /*
  * Called just before setting an indirect block pointer
  * to a newly allocated file page.
  */
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;	/* inode for file being extended */
 	ufs_lbn_t lbn;		/* allocated block number within file */
 	struct buf *bp;		/* buffer with indirect blk referencing page */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 	ufs_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno);
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR &&
 	    pagedep_lookup(ip, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&nbp->b_dep, &pagedep->pd_list);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called just before setting an indirect block pointer to a
  * newly allocated indirect block.
  */
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;	/* newly allocated indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct buf *bp;		/* indirect block referencing allocated block */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs_daddr_t newblkno;	/* disk block number being added */
 {
 	struct allocindir *aip;
 
 	aip = newallocindir(ip, ptrno, newblkno, 0);
 	ACQUIRE_LOCK(&lk);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_list);
 	FREE_LOCK(&lk);
 	setup_allocindir_phase2(bp, ip, aip);
 }
 
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static void 
 setup_allocindir_phase2(bp, ip, aip)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
 {
 	struct worklist *wk;
 	struct indirdep *indirdep, *newindirdep;
 	struct bmsafemap *bmsafemap;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	for (indirdep = NULL, newindirdep = NULL; ; ) {
 		ACQUIRE_LOCK(&lk);
 		for (wk = LIST_FIRST(&bp->b_dep); wk;
 		     wk = LIST_NEXT(wk, wk_list)) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
 			indirdep = WK_INDIRDEP(wk);
 			break;
 		}
 		if (indirdep == NULL && newindirdep) {
 			indirdep = newindirdep;
 			WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
 			newindirdep = NULL;
 		}
 		FREE_LOCK(&lk);
 		if (indirdep) {
 			if (newblk_lookup(ip->i_fs, aip->ai_newblkno, 0,
 			    &newblk) == 0)
 				panic("setup_allocindir: lost block");
 			ACQUIRE_LOCK(&lk);
 			if (newblk->nb_state == DEPCOMPLETE) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 			} else {
 				bmsafemap = newblk->nb_bmsafemap;
 				aip->ai_buf = bmsafemap->sm_buf;
 				LIST_REMOVE(newblk, nb_deps);
 				LIST_INSERT_HEAD(&bmsafemap->sm_allocindirhd,
 				    aip, ai_deps);
 			}
 			LIST_REMOVE(newblk, nb_hash);
 			FREE(newblk, M_NEWBLK);
 			aip->ai_indirdep = indirdep;
 			/*
 			 * Check to see if there is an existing dependency
 			 * for this block. If there is, merge the old
 			 * dependency into the new one.
 			 */
 			if (aip->ai_oldblkno == 0)
 				oldaip = NULL;
 			else
 				for (oldaip=LIST_FIRST(&indirdep->ir_deplisthd);
 				    oldaip; oldaip = LIST_NEXT(oldaip, ai_next))
 					if (oldaip->ai_offset == aip->ai_offset)
 						break;
 			if (oldaip != NULL) {
 				if (oldaip->ai_newblkno != aip->ai_oldblkno)
 					panic("setup_allocindir_phase2: blkno");
 				aip->ai_oldblkno = oldaip->ai_oldblkno;
 				freefrag = oldaip->ai_freefrag;
 				oldaip->ai_freefrag = aip->ai_freefrag;
 				aip->ai_freefrag = freefrag;
 				free_allocindir(oldaip, NULL);
 			}
 			LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
 			((ufs_daddr_t *)indirdep->ir_savebp->b_data)
 			    [aip->ai_offset] = aip->ai_oldblkno;
 			FREE_LOCK(&lk);
 		}
 		if (newindirdep) {
 			if (indirdep->ir_savebp != NULL)
 				brelse(newindirdep->ir_savebp);
 			WORKITEM_FREE((caddr_t)newindirdep, D_INDIRDEP);
 		}
 		if (indirdep)
 			break;
 		MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
 			M_INDIRDEP, M_WAITOK);
 		newindirdep->ir_list.wk_type = D_INDIRDEP;
 		newindirdep->ir_state = ATTACHED;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
 		if (bp->b_blkno == bp->b_lblkno) {
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 		}
 		newindirdep->ir_savebp =
 		    getblk(ip->i_devvp, bp->b_blkno, bp->b_bcount, 0, 0);
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
 	}
 }
 
 /*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
  * the blocks are made available for use by other files.  (The true
  * requirement is that old pointers must be nullified before new on-disk
  * pointers are set.  We chose this slightly more stringent requirement to
  * reduce complexity.) Our implementation handles this dependency by updating
  * the inode (or indirect block) appropriately but delaying the actual block
  * de-allocation (i.e., freemap and free space count manipulation) until
  * after the updated versions reach stable storage.  After the disk is
  * updated, the blocks can be safely de-allocated whenever it is convenient.
  * This implementation handles only the common case of reducing a file's
  * length to zero. Other cases are handled by the conventional synchronous
  * write approach.
  *
  * The ffs implementation with which we worked double-checks
  * the state of the block pointers and file size as it reduces
  * a file's length.  Some of this code is replicated here in our
  * soft updates implementation.  The freeblks->fb_chkcnt field is
  * used to transfer a part of this information to the procedure
  * that eventually de-allocates the blocks.
  *
  * This routine should be called from the routine that shortens
  * a file's length, before the inode's size or block pointers
  * are modified. It will save the block pointer information for
  * later release and zero the inode so that the calling routine
  * can release it.
  */
 static long num_freeblks;	/* number of freeblks allocated */
 void
 softdep_setup_freeblocks(ip, length)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	off_t length;		/* The new length for the file */
 {
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	struct vnode *vp;
 	struct buf *bp;
 	struct fs *fs;
 	int i, error;
 
 	fs = ip->i_fs;
 	if (length != 0)
 		panic("softde_setup_freeblocks: non-zero length");
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_freeblks > max_softdeps / 2 && speedup_syncer() == 0)
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 	num_freeblks += 1;
 	MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
 		M_FREEBLKS, M_WAITOK);
 	bzero(freeblks, sizeof(struct freeblks));
 	freeblks->fb_list.wk_type = D_FREEBLKS;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_previousinum = ip->i_number;
 	freeblks->fb_devvp = ip->i_devvp;
 	freeblks->fb_fs = fs;
 	freeblks->fb_oldsize = ip->i_size;
 	freeblks->fb_newsize = length;
 	freeblks->fb_chkcnt = ip->i_blocks;
 	for (i = 0; i < NDADDR; i++) {
 		freeblks->fb_dblks[i] = ip->i_db[i];
 		ip->i_db[i] = 0;
 	}
 	for (i = 0; i < NIADDR; i++) {
 		freeblks->fb_iblks[i] = ip->i_ib[i];
 		ip->i_ib[i] = 0;
 	}
 	ip->i_blocks = 0;
 	ip->i_size = 0;
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
 	 * the buffer can be safely released.
 	 */
 	if ((error = bread(ip->i_devvp,
 	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, NOCRED, &bp)) != 0)
 		softdep_error("softdep_setup_freeblocks", error);
 	*((struct dinode *)bp->b_data + ino_to_fsbo(fs, ip->i_number)) =
 	    ip->i_din;
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(fs, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk.
 	 */
 	WORKLIST_INSERT(&inodedep->id_bufwait, &freeblks->fb_list);
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
 	 */
 	merge_inode_lists(inodedep);
 	while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != 0)
 		free_allocdirect(&inodedep->id_inoupdt, adp, 1);
 	FREE_LOCK(&lk);
 	bdwrite(bp);
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
 	vp = ITOV(ip);
 	ACQUIRE_LOCK(&lk);
 	drain_output(vp, 1);
 	while (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT)) {
 		bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		(void) inodedep_lookup(fs, ip->i_number, 0, &inodedep);
 		deallocate_dependencies(bp, inodedep);
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		FREE_LOCK(&lk);
 		brelse(bp);
 		ACQUIRE_LOCK(&lk);
 	}
 	/*
 	 * Try freeing the inodedep in case that was the last dependency.
 	 */
 	if ((inodedep_lookup(fs, ip->i_number, 0, &inodedep)) != 0)
 		(void) free_inodedep(inodedep);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Reclaim any dependency structures from a buffer that is about to
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
  * associated with related dependencies do not occur.
  */
 static void
 deallocate_dependencies(bp, inodedep)
 	struct buf *bp;
 	struct inodedep *inodedep;
 {
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
 	struct diradd *dap;
 	int i;
 
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		switch (wk->wk_type) {
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			/*
 			 * None of the indirect pointers will ever be visible,
 			 * so they can simply be tossed. GOINGAWAY ensures
 			 * that allocated pointers will be saved in the buffer
 			 * cache until they are freed. Note that they will
 			 * only be able to be found by their physical address
 			 * since the inode mapping the logical address will
 			 * be gone. The save buffer used for the safe copy
 			 * was allocated in setup_allocindir_phase2 using
 			 * the physical address so it could be used for this
 			 * purpose. Hence we swap the safe copy with the real
 			 * copy, allowing the safe copy to be freed and holding
 			 * on to the real copy for later use in indir_trunc.
 			 */
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("deallocate_dependencies: already gone");
 			indirdep->ir_state |= GOINGAWAY;
 			while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != 0)
 				free_allocindir(aip, inodedep);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
 				panic("deallocate_dependencies: not indir");
 			bcopy(bp->b_data, indirdep->ir_savebp->b_data,
 			    bp->b_bcount);
 			WORKLIST_REMOVE(wk);
 			WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, wk);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			/*
 			 * None of the directory additions will ever be
 			 * visible, so they can simply be tossed.
 			 */
 			for (i = 0; i < DAHASHSZ; i++)
 				while ((dap =
 				    LIST_FIRST(&pagedep->pd_diraddhd[i])))
 					free_diradd(dap);
 			while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != 0)
 				free_diradd(dap);
 			/*
 			 * Copy any directory remove dependencies to the list
 			 * to be processed after the zero'ed inode is written.
 			 * If the inode has already been written, then they 
 			 * can be dumped directly onto the work list.
 			 */
 			for (dirrem = LIST_FIRST(&pagedep->pd_dirremhd); dirrem;
 			     dirrem = LIST_NEXT(dirrem, dm_next)) {
 				LIST_REMOVE(dirrem, dm_next);
 				dirrem->dm_dirinum = pagedep->pd_ino;
 				if (inodedep == NULL)
 					add_to_worklist(&dirrem->dm_list);
 				else
 					WORKLIST_INSERT(&inodedep->id_bufwait,
 					    &dirrem->dm_list);
 			}
 			WORKLIST_REMOVE(&pagedep->pd_list);
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			continue;
 
 		case D_ALLOCINDIR:
 			free_allocindir(WK_ALLOCINDIR(wk), inodedep);
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_INODEDEP:
 			panic("deallocate_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 
 		default:
 			panic("deallocate_dependencies: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Free an allocdirect. Generate a new freefrag work request if appropriate.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocdirect(adphead, adp, delay)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
 	int delay;
 {
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocdirect: lock not held");
 #endif
 	if ((adp->ad_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(adp, ad_deps);
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	if ((adp->ad_state & COMPLETE) == 0)
 		WORKLIST_REMOVE(&adp->ad_list);
 	if (adp->ad_freefrag != NULL) {
 		if (delay)
 			WORKLIST_INSERT(&adp->ad_inodedep->id_bufwait,
 			    &adp->ad_freefrag->ff_list);
 		else
 			add_to_worklist(&adp->ad_freefrag->ff_list);
 	}
 	WORKITEM_FREE(adp, D_ALLOCDIRECT);
 }
 
 /*
  * Prepare an inode to be freed. The actual free operation is not
  * done until the zero'ed inode has been written to disk.
  */
 static long num_freefile;	/* number of freefile allocated */
 void
 softdep_freefile(pvp, ino, mode)
 		struct vnode *pvp;
 		ino_t ino;
 		int mode;
 {
 	struct inode *ip = VTOI(pvp);
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 
 	/*
 	 * If we are over our limit, try to improve the situation.
 	 */
 	if (num_freefile > max_softdeps / 2 && speedup_syncer() == 0)
 		(void) request_cleanup(FLUSH_REMOVE, 0);
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
 	num_freefile += 1;
 	MALLOC(freefile, struct freefile *, sizeof(struct freefile),
 		M_FREEFILE, M_WAITOK);
 	freefile->fx_list.wk_type = D_FREEFILE;
 	freefile->fx_list.wk_state = 0;
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ip->i_devvp;
 	freefile->fx_fs = ip->i_fs;
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk and we can free the file immediately.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ino, 0, &inodedep) == 0) {
 		add_to_worklist(&freefile->fx_list);
 		FREE_LOCK(&lk);
 		return;
 	}
 
 	/*
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk. Drop the dependency as it is no longer
 	 * necessary since the inode is being deallocated. We could process
 	 * the freefile immediately, but then we would have to clear the
 	 * id_inowait dependencies here and it is easier just to let the
 	 * zero'ed inode be written and let them be cleaned up in the
 	 * normal followup actions that follow the inode write.
 	 */
 	 if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		inodedep->id_state |= DEPCOMPLETE;
 		LIST_REMOVE(inodedep, id_deps);
 		inodedep->id_buf = NULL;
 	}
 	/*
 	 * If the inodedep has no dependencies associated with it,
 	 * then we must free it here and free the file immediately.
 	 * This case arises when an early allocation fails (for
 	 * example, the user is over their file quota).
 	 */
 	if (free_inodedep(inodedep) == 0)
 		WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
 	else
 		add_to_worklist(&freefile->fx_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Try to free an inodedep structure. Return 1 if it could be freed.
  */
 static int
 free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
 	if ((inodedep->id_state & ONWORKLIST) != 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    LIST_FIRST(&inodedep->id_pendinghd) != NULL ||
 	    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 	    LIST_FIRST(&inodedep->id_inowait) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 	    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL ||
 	    inodedep->id_nlinkdelta != 0 || inodedep->id_savedino != NULL)
 		return (0);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	num_inodedep -= 1;
 	return (1);
 }
 
 /*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
  * checks regarding the number of blocks de-allocated (compared
  * to the number of blocks allocated for the file) are also
  * performed in this function.
  */
 static void
 handle_workitem_freeblocks(freeblks)
 	struct freeblks *freeblks;
 {
 	struct inode tip;
 	ufs_daddr_t bn;
 	struct fs *fs;
 	int i, level, bsize;
 	long nblocks, blocksreleased = 0;
 	int error, allerror = 0;
 	ufs_lbn_t baselbns[NIADDR], tmpval;
 
 	tip.i_number = freeblks->fb_previousinum;
 	tip.i_devvp = freeblks->fb_devvp;
 	tip.i_dev = freeblks->fb_devvp->v_rdev;
 	tip.i_fs = freeblks->fb_fs;
 	tip.i_size = freeblks->fb_oldsize;
 	tip.i_uid = freeblks->fb_uid;
 	fs = freeblks->fb_fs;
 	tmpval = 1;
 	baselbns[0] = NDADDR;
 	for (i = 1; i < NIADDR; i++) {
 		tmpval *= NINDIR(fs);
 		baselbns[i] = baselbns[i - 1] + tmpval;
 	}
 	nblocks = btodb(fs->fs_bsize);
 	blocksreleased = 0;
 	/*
 	 * Indirect blocks first.
 	 */
 	for (level = (NIADDR - 1); level >= 0; level--) {
 		if ((bn = freeblks->fb_iblks[level]) == 0)
 			continue;
 		if ((error = indir_trunc(&tip, fsbtodb(fs, bn), level,
 		    baselbns[level], &blocksreleased)) == 0)
 			allerror = error;
 		ffs_blkfree(&tip, bn, fs->fs_bsize);
 		blocksreleased += nblocks;
 	}
 	/*
 	 * All direct blocks or frags.
 	 */
 	for (i = (NDADDR - 1); i >= 0; i--) {
 		if ((bn = freeblks->fb_dblks[i]) == 0)
 			continue;
 		bsize = blksize(fs, &tip, i);
 		ffs_blkfree(&tip, bn, bsize);
 		blocksreleased += btodb(bsize);
 	}
 
 #ifdef DIAGNOSTIC
 	if (freeblks->fb_chkcnt != blocksreleased)
 		panic("handle_workitem_freeblocks: block count");
 	if (allerror)
 		softdep_error("handle_workitem_freeblks", allerror);
 #endif /* DIAGNOSTIC */
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 	num_freeblks -= 1;
 }
 
 /*
  * Release blocks associated with the inode ip and stored in the indirect
  * block dbn. If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  */
 static int
 indir_trunc(ip, dbn, level, lbn, countp)
 	struct inode *ip;
 	ufs_daddr_t dbn;
 	int level;
 	ufs_lbn_t lbn;
 	long *countp;
 {
 	struct buf *bp;
 	ufs_daddr_t *bap;
 	ufs_daddr_t nb;
 	struct fs *fs;
 	struct worklist *wk;
 	struct indirdep *indirdep;
 	int i, lbnadd, nblocks;
 	int error, allerror = 0;
 
 	fs = ip->i_fs;
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	/*
 	 * Get buffer of block pointers to be freed. This routine is not
 	 * called until the zero'ed inode has been written, so it is safe
 	 * to free blocks as they are encountered. Because the inode has
 	 * been zero'ed, calls to bmap on these blocks will fail. So, we
 	 * have to use the on-disk address and the block device for the
 	 * filesystem to look them up. If the file was deleted before its
 	 * indirect blocks were all written to disk, the routine that set
 	 * us up (deallocate_dependencies) will have arranged to leave
 	 * a complete copy of the indirect block in memory for our use.
 	 * Otherwise we have to read the blocks in from the disk.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((bp = incore(ip->i_devvp, dbn)) != NULL &&
 	    (wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		if (wk->wk_type != D_INDIRDEP ||
 		    (indirdep = WK_INDIRDEP(wk))->ir_savebp != bp ||
 		    (indirdep->ir_state & GOINGAWAY) == 0)
 			panic("indir_trunc: lost indirdep");
 		WORKLIST_REMOVE(wk);
 		WORKITEM_FREE(indirdep, D_INDIRDEP);
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			panic("indir_trunc: dangling dep");
 		FREE_LOCK(&lk);
 	} else {
 		FREE_LOCK(&lk);
 		error = bread(ip->i_devvp, dbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error)
 			return (error);
 	}
 	/*
 	 * Recursively free indirect blocks.
 	 */
 	bap = (ufs_daddr_t *)bp->b_data;
 	nblocks = btodb(fs->fs_bsize);
 	for (i = NINDIR(fs) - 1; i >= 0; i--) {
 		if ((nb = bap[i]) == 0)
 			continue;
 		if (level != 0) {
 			if ((error = indir_trunc(ip, fsbtodb(fs, nb),
 			     level - 1, lbn + (i * lbnadd), countp)) != 0)
 				allerror = error;
 		}
 		ffs_blkfree(ip, nb, fs->fs_bsize);
 		*countp += nblocks;
 	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	brelse(bp);
 	return (allerror);
 }
 
 /*
  * Free an allocindir.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_allocindir(aip, inodedep)
 	struct allocindir *aip;
 	struct inodedep *inodedep;
 {
 	struct freefrag *freefrag;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_allocindir: lock not held");
 #endif
 	if ((aip->ai_state & DEPCOMPLETE) == 0)
 		LIST_REMOVE(aip, ai_deps);
 	if (aip->ai_state & ONWORKLIST)
 		WORKLIST_REMOVE(&aip->ai_list);
 	LIST_REMOVE(aip, ai_next);
 	if ((freefrag = aip->ai_freefrag) != NULL) {
 		if (inodedep == NULL)
 			add_to_worklist(&freefrag->ff_list);
 		else
 			WORKLIST_INSERT(&inodedep->id_bufwait,
 			    &freefrag->ff_list);
 	}
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Directory entry addition dependencies.
  * 
  * When adding a new directory entry, the inode (with its incremented link
  * count) must be written to disk before the directory entry's pointer to it.
  * Also, if the inode is newly allocated, the corresponding freemap must be
  * updated (on disk) before the directory entry's pointer. These requirements
  * are met via undo/redo on the directory entry's pointer, which consists
  * simply of the inode number.
  * 
  * As directory entries are added and deleted, the free space within a
  * directory block can become fragmented.  The ufs file system will compact
  * a fragmented directory block to make space for a new entry. When this
  * occurs, the offsets of previously added entries change. Any "diradd"
  * dependency structures corresponding to these entries must be updated with
  * the new offsets.
  */
 
 /*
  * This routine is called after the in-memory inode's link
  * count has been incremented, but before the directory entry's
  * pointer to the inode has been set.
  */
 void 
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for directory */
 	off_t diroffset;	/* offset of new entry in directory */
 	long newinum;		/* inode referenced by new directory entry */
 	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
 {
 	int offset;		/* offset of new entry within directory block */
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir1, *mkdir2;
 
 	/*
 	 * Whiteouts have no dependencies.
 	 */
 	if (newinum == WINO) {
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		return;
 	}
 
 	fs = dp->i_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
 	MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_WAITOK);
 	bzero(dap, sizeof(struct diradd));
 	dap->da_list.wk_type = D_DIRADD;
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(&lk);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_WAITOK);
 		mkdir1->md_list.wk_type = D_MKDIR;
 		mkdir1->md_state = MKDIR_BODY;
 		mkdir1->md_diradd = dap;
 		MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
 		    M_WAITOK);
 		mkdir2->md_list.wk_type = D_MKDIR;
 		mkdir2->md_state = MKDIR_PARENT;
 		mkdir2->md_diradd = dap;
 		/*
 		 * Dependency on "." and ".." being written to disk.
 		 */
 		mkdir1->md_buf = newdirbp;
 		ACQUIRE_LOCK(&lk);
 		LIST_INSERT_HEAD(&mkdirlisthd, mkdir1, md_mkdirs);
 		WORKLIST_INSERT(&newdirbp->b_dep, &mkdir1->md_list);
 		FREE_LOCK(&lk);
 		bdwrite(newdirbp);
 		/*
 		 * Dependency on link count increase for parent directory
 		 */
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(dp->i_fs, dp->i_number, 0, &inodedep) == 0
 		    || (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 			dap->da_state &= ~MKDIR_PARENT;
 			WORKITEM_FREE(mkdir2, D_MKDIR);
 		} else {
 			LIST_INSERT_HEAD(&mkdirlisthd, mkdir2, md_mkdirs);
 			WORKLIST_INSERT(&inodedep->id_bufwait,&mkdir2->md_list);
 		}
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	(void) inodedep_lookup(fs, newinum, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This procedure is called to change the offset of a directory
  * entry when compacting a directory block which must be owned
  * exclusively by the caller. Note that the actual entry movement
  * must be done in this procedure to ensure that no I/O completions
  * occur while the move is in progress.
  */
 void 
 softdep_change_directoryentry_offset(dp, base, oldloc, newloc, entrysize)
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
 	caddr_t newloc;		/* address of new directory location */
 	int entrysize;		/* size of directory entry */
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	ufs_lbn_t lbn;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, 0, &pagedep) == 0)
 		goto done;
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
 	for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(oldoffset)]);
 	     dap; dap = LIST_NEXT(dap, da_pdlist)) {
 		if (dap->da_offset != oldoffset)
 			continue;
 		dap->da_offset = newoffset;
 		if (DIRADDHASH(newoffset) == DIRADDHASH(oldoffset))
 			break;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(newoffset)],
 		    dap, da_pdlist);
 		break;
 	}
 	if (dap == NULL) {
 		for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
 		     dap; dap = LIST_NEXT(dap, da_pdlist)) {
 			if (dap->da_offset == oldoffset) {
 				dap->da_offset = newoffset;
 				break;
 			}
 		}
 	}
 done:
 	bcopy(oldloc, newloc, entrysize);
 	FREE_LOCK(&lk);
 }
 
 /*
  * Free a diradd dependency structure. This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 free_diradd(dap)
 	struct diradd *dap;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 
 #ifdef DEBUG
 	if (lk.lkt_held == -1)
 		panic("free_diradd: lock not held");
 #endif
 	WORKLIST_REMOVE(&dap->da_list);
 	LIST_REMOVE(dap, da_pdlist);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	if (inodedep_lookup(VFSTOUFS(pagedep->pd_mnt)->um_fs, dap->da_newinum,
 	    0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&mkdirlisthd); mkdir; mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &= ~mkdir->md_state;
 			WORKLIST_REMOVE(&mkdir->md_list);
 			LIST_REMOVE(mkdir, md_mkdirs);
 			WORKITEM_FREE(mkdir, D_MKDIR);
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
 /*
  * Directory entry removal dependencies.
  * 
  * When removing a directory entry, the entry's inode pointer must be
  * zero'ed on disk before the corresponding inode's link count is decremented
  * (possibly freeing the inode for re-use). This dependency is handled by
  * updating the directory entry but delaying the inode count reduction until
  * after the directory block has been written to disk. After this point, the
  * inode count can be decremented whenever it is convenient.
  */
 
 /*
  * This routine should be called immediately after removing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will do this task when it is safe.
  */
 void 
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem;
 
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir);
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
 	} else {
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
 static struct dirrem *
 newdirrem(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	ufs_lbn_t lbn;
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
 	MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
 		M_DIRREM, M_WAITOK);
 	bzero(dirrem, sizeof(struct dirrem));
 	dirrem->dm_list.wk_type = D_DIRREM;
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_mnt = ITOV(ip)->v_mount;
 	dirrem->dm_oldinum = ip->i_number;
 
 	ACQUIRE_LOCK(&lk);
 	lbn = lblkno(dp->i_fs, dp->i_offset);
 	offset = blkoff(dp->i_fs, dp->i_offset);
 	if (pagedep_lookup(dp, lbn, DEPALLOC, &pagedep) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	dirrem->dm_pagedep = pagedep;
 	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
 	 * be de-allocated. Check for an entry on both the pd_dirraddhd
 	 * list and the pd_pendinghd list.
 	 */
 	for (dap = LIST_FIRST(&pagedep->pd_diraddhd[DIRADDHASH(offset)]);
 	     dap; dap = LIST_NEXT(dap, da_pdlist))
 		if (dap->da_offset == offset)
 			break;
 	if (dap == NULL) {
 		for (dap = LIST_FIRST(&pagedep->pd_pendinghd);
 		     dap; dap = LIST_NEXT(dap, da_pdlist))
 			if (dap->da_offset == offset)
 				break;
 		if (dap == NULL)
 			return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point, so just delete it.
 	 */
 	if ((dap->da_state & ATTACHED) == 0)
 		panic("newdirrem: not ATTACHED");
 	if (dap->da_newinum != ip->i_number)
 		panic("newdirrem: inum %d should be %d",
 		    ip->i_number, dap->da_newinum);
 	free_diradd(dap);
 	dirrem->dm_state |= COMPLETE;
 	return (dirrem);
 }
 
 /*
  * Directory entry change dependencies.
  * 
  * Changing an existing directory entry requires that an add operation
  * be completed first followed by a deletion. The semantics for the addition
  * are identical to the description of adding a new entry above except
  * that the rollback is to the old inode number rather than zero. Once
  * the addition dependency is completed, the removal is done as described
  * in the removal routine above.
  */
 
 /*
  * This routine should be called immediately after changing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will perform this task when it is safe.
  */
 void 
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	long newinum;		/* new inode number for changed entry */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	struct diradd *dap = NULL;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 
 	offset = blkoff(dp->i_fs, dp->i_offset);
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
 		MALLOC(dap, struct diradd *, sizeof(struct diradd),
 		    M_DIRADD, M_WAITOK);
 		bzero(dap, sizeof(struct diradd));
 		dap->da_list.wk_type = D_DIRADD;
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
 	}
 
 	/*
 	 * Allocate a new dirrem and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir);
 	pagedep = dirrem->dm_pagedep;
 	/*
 	 * The possible values for isrmdir:
 	 *	0 - non-directory file rename
 	 *	1 - directory rename within same directory
 	 *   inum - directory rename to new directory of given inode number
 	 * When renaming to a new directory, we are both deleting and
 	 * creating a new directory entry, so the link count on the new
 	 * directory should not change. Thus we do not need the followup
 	 * dirrem which is usually done in handle_workitem_remove. We set
 	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
 	 * followup dirrem.
 	 */
 	if (isrmdir > 1)
 		dirrem->dm_state |= DIRCHG;
 
 	/*
 	 * Whiteouts have no additional dependencies,
 	 * so just put the dirrem on the correct list.
 	 */
 	if (newinum == WINO) {
 		if ((dirrem->dm_state & COMPLETE) == 0) {
 			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			add_to_worklist(&dirrem->dm_list);
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 
 	/*
 	 * Link into its inodedep. Put it on the id_bufwait list if the inode
 	 * is not yet written. If it is written, do the post-inode write
 	 * processing to put it on the id_pendinghd list.
 	 */
 	dap->da_previous = dirrem;
 	if (inodedep_lookup(dp->i_fs, newinum, DEPALLOC, &inodedep) == 0 ||
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
 	/*
 	 * If the previous inode was never written or its previous directory
 	 * entry was never written, then we do not want to roll back to this
 	 * previous value. Instead we want to roll back to zero and immediately
 	 * free the unwritten or unreferenced inode.
 	 */
 	if (dirrem->dm_state & COMPLETE) {
 		dap->da_state &= ~DIRCHG;
 		dap->da_pagedep = pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever the link count on an inode is increased.
  * It creates an inode dependency so that the new reference(s)
  * to the inode cannot be committed to disk until the updated
  * inode has been written.
  */
 void
 softdep_increase_linkcnt(ip)
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
 
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC, &inodedep);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
 static void 
 handle_workitem_remove(dirrem)
 	struct dirrem *dirrem;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct vnode *vp;
 	struct inode *ip;
 	int error;
 
 	if ((error = VFS_VGET(dirrem->dm_mnt, dirrem->dm_oldinum, &vp)) != 0) {
 		softdep_error("handle_workitem_remove: vget", error);
 		return;
 	}
 	ip = VTOI(vp);
 	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
 		ip->i_nlink--;
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
 		ip->i_flag |= IN_CHANGE;
 		vput(vp);
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
 	 * just deleted parent directory entry and the reference for ".".
 	 * Next truncate the directory to length zero. When the
 	 * truncation completes, arrange to have the reference count on
 	 * the parent decremented to account for the loss of "..".
 	 */
 	ip->i_nlink -= 2;
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
 	ip->i_flag |= IN_CHANGE;
 	if ((error = UFS_TRUNCATE(vp, (off_t)0, 0, p->p_ucred, p)) != 0)
 		softdep_error("handle_workitem_remove: truncate", error);
 	/*
 	 * Rename a directory to a new parent. Since, we are both deleting
 	 * and creating a new directory entry, the link count on the new
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		vput(vp);
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		return;
 	}
 	ACQUIRE_LOCK(&lk);
 	(void) inodedep_lookup(ip->i_fs, dirrem->dm_oldinum, DEPALLOC,
 	    &inodedep);
 	dirrem->dm_state = 0;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(&lk);
 	vput(vp);
 }
 
 /*
  * Inode de-allocation dependencies.
  * 
  * When an inode's link count is reduced to zero, it can be de-allocated. We
  * found it convenient to postpone de-allocation until after the inode is
  * written to disk with its new link count (zero).  At this point, all of the
  * on-disk inode's block pointers are nullified and, with careful dependency
  * list ordering, all dependencies related to the inode will be satisfied and
  * the corresponding dependency structures de-allocated.  So, if/when the
  * inode is reused, there will be no mixing of old dependencies with new
  * ones.  This artificial dependency is set up by the block de-allocation
  * procedure above (softdep_setup_freeblocks) and completed by the
  * following procedure.
  */
 static void 
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct vnode vp;
 	struct inode tip;
 	struct inodedep *idp;
 	int error;
 
 #ifdef DEBUG
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(freefile->fx_fs, freefile->fx_oldinum, 0, &idp))
 		panic("handle_workitem_freefile: inodedep survived");
 	FREE_LOCK(&lk);
 #endif
 	tip.i_devvp = freefile->fx_devvp;
 	tip.i_dev = freefile->fx_devvp->v_rdev;
 	tip.i_fs = freefile->fx_fs;
 	vp.v_data = &tip;
 	if ((error = ffs_freefile(&vp, freefile->fx_oldinum, freefile->fx_mode)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	WORKITEM_FREE(freefile, D_FREEFILE);
 	num_freefile -= 1;
 }
 
 /*
  * Disk writes.
  * 
  * The dependency structures constructed above are most actively used when file
  * system blocks are written to disk.  No constraints are placed on when a
  * block can be written, but unsatisfied update dependencies are made safe by
  * modifying (or replacing) the source memory for the duration of the disk
  * write.  When the disk write completes, the memory block is again brought
  * up-to-date.
  *
  * In-core inode structure reclamation.
  * 
  * Because there are a finite number of "in-core" inode structures, they are
  * reused regularly.  By transferring all inode-related dependencies to the
  * in-memory inode block and indexing them separately (via "inodedep"s), we
  * can allow "in-core" inode structures to be reused at any time and avoid
  * any increase in contention.
  *
  * Called just before entering the device driver to initiate a new disk I/O.
  * The buffer must be locked, thus, no I/O completion operations can occur
  * while we are manipulating its associated dependencies.
  */
 void 
 softdep_disk_io_initiation(bp)
 	struct buf *bp;		/* structure describing disk write to occur */
 {
 	struct worklist *wk, *nextwk;
 	struct indirdep *indirdep;
 
 	/*
 	 * We only care about write operations. There should never
 	 * be dependencies for reads.
 	 */
 	if (bp->b_flags & B_READ)
 		panic("softdep_disk_io_initiation: read");
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk; wk = nextwk) {
 		nextwk = LIST_NEXT(wk, wk_list);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			initiate_write_filepage(WK_PAGEDEP(wk), bp);
 			continue;
 
 		case D_INODEDEP:
 			initiate_write_inodeblock(WK_INODEDEP(wk), bp);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_io_initiation: indirdep gone");
 			/*
 			 * If there are no remaining dependencies, this
 			 * will be writing the real pointers, so the
 			 * dependency can be freed.
 			 */
 			if (LIST_FIRST(&indirdep->ir_deplisthd) == NULL) {
 				indirdep->ir_savebp->b_flags |= B_INVAL | B_NOCACHE;
 				brelse(indirdep->ir_savebp);
 				/* inline expand WORKLIST_REMOVE(wk); */
 				wk->wk_state &= ~ONWORKLIST;
 				LIST_REMOVE(wk, wk_list);
 				WORKITEM_FREE(indirdep, D_INDIRDEP);
 				continue;
 			}
 			/*
 			 * Replace up-to-date version with safe version.
 			 */
 			ACQUIRE_LOCK(&lk);
 			indirdep->ir_state &= ~ATTACHED;
 			indirdep->ir_state |= UNDONE;
 			MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
 			    M_INDIRDEP, M_WAITOK);
 			bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 			bcopy(indirdep->ir_savebp->b_data, bp->b_data,
 			    bp->b_bcount);
 			FREE_LOCK(&lk);
 			continue;
 
 		case D_MKDIR:
 		case D_BMSAFEMAP:
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			continue;
 
 		default:
 			panic("handle_disk_io_initiation: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in a directory. The buffer must be locked,
  * thus, no I/O completion operations can occur while we are
  * manipulating its associated dependencies.
  */
 static void
 initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
 
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
 		 * understand chaining. Here biodone will reissue the call
 		 * to strategy for the incomplete buffers.
 		 */
 		printf("initiate_write_filepage: already started\n");
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
 	ACQUIRE_LOCK(&lk);
 	for (i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = LIST_NEXT(dap, da_pdlist)) {
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum)
 				panic("%s: dir inum %d != new %d",
 				    "initiate_write_filepage",
 				    ep->d_ino, dap->da_newinum);
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
 				ep->d_ino = 0;
 			dap->da_state &= ~ATTACHED;
 			dap->da_state |= UNDONE;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct dinode *dp;
 	struct fs *fs;
 	ufs_lbn_t prevlbn = 0;
 	int i, deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino != NULL)
 			panic("initiate_write_inodeblock: already doing I/O");
 		MALLOC(inodedep->id_savedino, struct dinode *,
 		    sizeof(struct dinode), M_INODEDEP, M_WAITOK);
 		*inodedep->id_savedino = *dp;
 		bzero((caddr_t)dp, sizeof(struct dinode));
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) == NULL)
 		return;
 	/*
 	 * Set the dependencies to busy.
 	 */
 	ACQUIRE_LOCK(&lk);
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef DIAGNOSTIC
 		if (deplist != 0 && prevlbn >= adp->ad_lbn)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_lbn;
 		if (adp->ad_lbn < NDADDR &&
 		    dp->di_db[adp->ad_lbn] != adp->ad_newblkno)
 			panic("%s: direct pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn,
 			    dp->di_db[adp->ad_lbn], adp->ad_newblkno);
 		if (adp->ad_lbn >= NDADDR &&
 		    dp->di_ib[adp->ad_lbn - NDADDR] != adp->ad_newblkno)
 			panic("%s: indirect pointer #%ld mismatch %d != %d",
 			    "softdep_write_inodeblock", adp->ad_lbn - NDADDR,
 			    dp->di_ib[adp->ad_lbn - NDADDR], adp->ad_newblkno);
 		deplist |= 1 << adp->ad_lbn;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* DIAGNOSTIC */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_lbn >= NDADDR)
 			break;
 		dp->di_db[adp->ad_lbn] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_lbn + adp->ad_oldsize;
 		for (i = adp->ad_lbn + 1; i < NDADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* DIAGNOSTIC */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef DIAGNOSTIC
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* DIAGNOSTIC */
 			dp->di_ib[i] = 0;
 		}
 		FREE_LOCK(&lk);
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_lbn + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_lbn; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_lbn - NDADDR] = 0;
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the file system caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
  */
 void 
 softdep_disk_write_complete(bp)
 	struct buf *bp;		/* describes the completed disk write */
 {
 	struct worklist *wk;
 	struct workhead reattach;
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct allocdirect *adp;
 	struct indirdep *indirdep;
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 
 #ifdef DEBUG
 	if (lk.lkt_held != -1)
 		panic("softdep_disk_write_complete: lock is held");
 	lk.lkt_held = -2;
 #endif
 	LIST_INIT(&reattach);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
 			bmsafemap = WK_BMSAFEMAP(wk);
 			while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkhd))) {
 				newblk->nb_state |= DEPCOMPLETE;
 				newblk->nb_bmsafemap = NULL;
 				LIST_REMOVE(newblk, nb_deps);
 			}
 			while ((adp =
 			   LIST_FIRST(&bmsafemap->sm_allocdirecthd))) {
 				adp->ad_state |= DEPCOMPLETE;
 				adp->ad_buf = NULL;
 				LIST_REMOVE(adp, ad_deps);
 				handle_allocdirect_partdone(adp);
 			}
 			while ((aip =
 			    LIST_FIRST(&bmsafemap->sm_allocindirhd))) {
 				aip->ai_state |= DEPCOMPLETE;
 				aip->ai_buf = NULL;
 				LIST_REMOVE(aip, ai_deps);
 				handle_allocindir_partdone(aip);
 			}
 			while ((inodedep =
 			     LIST_FIRST(&bmsafemap->sm_inodedephd)) != NULL) {
 				inodedep->id_state |= DEPCOMPLETE;
 				LIST_REMOVE(inodedep, id_deps);
 				inodedep->id_buf = NULL;
 			}
 			WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 			continue;
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			adp->ad_state |= COMPLETE;
 			handle_allocdirect_partdone(adp);
 			continue;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			aip->ai_state |= COMPLETE;
 			handle_allocindir_partdone(aip);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (indirdep->ir_state & GOINGAWAY)
 				panic("disk_write_complete: indirdep gone");
 			bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
 			FREE(indirdep->ir_saveddata, M_INDIRDEP);
 			indirdep->ir_saveddata = 0;
 			indirdep->ir_state &= ~UNDONE;
 			indirdep->ir_state |= ATTACHED;
 			while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != 0) {
 				handle_allocindir_partdone(aip);
 				if (aip == LIST_FIRST(&indirdep->ir_donehd))
 					panic("disk_write_complete: not gone");
 			}
 			WORKLIST_INSERT(&reattach, wk);
 			if ((bp->b_flags & B_DELWRI) == 0)
 				stat_indir_blk_ptrs++;
 			bdirty(bp);
 			continue;
 
 		default:
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/*
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 #ifdef DEBUG
 	if (lk.lkt_held != -2)
 		panic("softdep_disk_write_complete: lock lost");
 	lk.lkt_held = -1;
 #endif
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void 
 handle_allocdirect_partdone(adp)
 	struct allocdirect *adp;	/* the completed allocdirect */
 {
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
 	long bsize;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (adp->ad_buf != NULL)
 		panic("handle_allocdirect_partdone: dangling dep");
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem. Thus, we cannot free any
 	 * allocdirects after one whose ad_oldblkno claims a fragment as
 	 * these blocks must be rolled back to zero before writing the inode.
 	 * We check the currently active set of allocdirects in id_inoupdt.
 	 */
 	inodedep = adp->ad_inodedep;
 	bsize = inodedep->id_fs->fs_bsize;
 	for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp;
 	     listadp = TAILQ_NEXT(listadp, ad_next)) {
 		/* found our block */
 		if (listadp == adp)
 			break;
 		/* continue if ad_oldlbn is not a fragment */
 		if (listadp->ad_oldsize == 0 ||
 		    listadp->ad_oldsize == bsize)
 			continue;
 		/* hit a fragment */
 		return;
 	}
 	/*
 	 * If we have reached the end of the current list without
 	 * finding the just finished dependency, then it must be
 	 * on the future dependency list. Future dependencies cannot
 	 * be freed until they are moved to the current list.
 	 */
 	if (listadp == NULL) {
 #ifdef DEBUG
 		for (listadp = TAILQ_FIRST(&inodedep->id_newinoupdt); listadp;
 		     listadp = TAILQ_NEXT(listadp, ad_next))
 			/* found our block */
 			if (listadp == adp)
 				break;
 		if (listadp == NULL)
 			panic("handle_allocdirect_partdone: lost dep");
 #endif /* DEBUG */
 		return;
 	}
 	/*
 	 * If we have found the just finished dependency, then free
 	 * it along with anything that follows it that is complete.
 	 */
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
 		free_allocdirect(&inodedep->id_inoupdt, adp, 1);
 	}
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void
 handle_allocindir_partdone(aip)
 	struct allocindir *aip;		/* the completed allocindir */
 {
 	struct indirdep *indirdep;
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (aip->ai_buf != NULL)
 		panic("handle_allocindir_partdone: dangling dependency");
 	indirdep = aip->ai_indirdep;
 	if (indirdep->ir_state & UNDONE) {
 		LIST_REMOVE(aip, ai_next);
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
 	((ufs_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 	    aip->ai_newblkno;
 	LIST_REMOVE(aip, ai_next);
 	if (aip->ai_freefrag != NULL)
 		add_to_worklist(&aip->ai_freefrag->ff_list);
 	WORKITEM_FREE(aip, D_ALLOCINDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static int 
 handle_written_inodeblock(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 {
 	struct worklist *wk, *filefree;
 	struct allocdirect *adp, *nextadp;
 	struct dinode *dp;
 	int hadchanges;
 
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
 	inodedep->id_state |= COMPLETE;
 	dp = (struct dinode *)bp->b_data +
 	    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
 	 * all associated dependencies have been cleared and the
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino != NULL) {
 		*dp = *inodedep->id_savedino;
 		FREE(inodedep->id_savedino, M_INODEDEP);
 		inodedep->id_savedino = NULL;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
 	hadchanges = 0;
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (adp->ad_lbn < NDADDR) {
 			if (dp->di_db[adp->ad_lbn] != adp->ad_oldblkno)
 				panic("%s: %s #%ld mismatch %d != %d",
 				    "handle_written_inodeblock",
 				    "direct pointer", adp->ad_lbn,
 				    dp->di_db[adp->ad_lbn], adp->ad_oldblkno);
 			dp->di_db[adp->ad_lbn] = adp->ad_newblkno;
 		} else {
 			if (dp->di_ib[adp->ad_lbn - NDADDR] != 0)
 				panic("%s: %s #%ld allocated as %d",
 				    "handle_written_inodeblock",
 				    "indirect pointer", adp->ad_lbn - NDADDR,
 				    dp->di_ib[adp->ad_lbn - NDADDR]);
 			dp->di_ib[adp->ad_lbn - NDADDR] = adp->ad_newblkno;
 		}
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
 	 */
 	if (inodedep->id_savedsize == -1)
 		panic("handle_written_inodeblock: bad size");
 	if (dp->di_size != inodedep->id_savedsize) {
 		dp->di_size = inodedep->id_savedsize;
 		hadchanges = 1;
 	}
 	inodedep->id_savedsize = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges)
 		bdirty(bp);
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 		handle_allocdirect_partdone(adp);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
 	 * before the old ones have been deleted.
 	 */
 	filefree = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 
 		case D_FREEFILE:
 			/*
 			 * We defer adding filefree to the worklist until
 			 * all other additions have been made to ensure
 			 * that it will be done after all the old blocks
 			 * have been freed.
 			 */
 			if (filefree != NULL)
 				panic("handle_written_inodeblock: filefree");
 			filefree = wk;
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
 			continue;
 
 		case D_DIRADD:
 			diradd_inode_written(WK_DIRADD(wk), inodedep);
 			continue;
 
 		case D_FREEBLKS:
 		case D_FREEFRAG:
 		case D_DIRREM:
 			add_to_worklist(wk);
 			continue;
 
 		default:
 			panic("handle_written_inodeblock: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	if (filefree != NULL) {
 		if (free_inodedep(inodedep) == 0)
 			panic("handle_written_inodeblock: live inodedep");
 		add_to_worklist(filefree);
 		return (0);
 	}
 
 	/*
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) || TAILQ_FIRST(&inodedep->id_inoupdt) == 0)
 		return (0);
 	return (hadchanges);
 }
 
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
 	struct pagedep *pagedep;
 
 	dap->da_state |= COMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
  * Handle the completion of a mkdir dependency.
  */
 static void
 handle_written_mkdir(mkdir, type)
 	struct mkdir *mkdir;
 	int type;
 {
 	struct diradd *dap;
 	struct pagedep *pagedep;
 
 	if (mkdir->md_state != type)
 		panic("handle_written_mkdir: bad type");
 	dap = mkdir->md_diradd;
 	dap->da_state &= ~type;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
 		dap->da_state |= DEPCOMPLETE;
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 	LIST_REMOVE(mkdir, md_mkdirs);
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
  * with further splbio interrupts blocked.
  */
 static int 
 handle_written_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
 	struct direct *ep;
 	int i, chgs;
 
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		add_to_worklist(&dirrem->dm_list);
 	}
 	/*
 	 * Free any directory additions that have been committed.
 	 */
 	while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 		free_diradd(dap);
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
 	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = nextdap) {
 			nextdap = LIST_NEXT(dap, da_pdlist);
 			if (dap->da_state & ATTACHED)
 				panic("handle_written_filepage: attached");
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			ep->d_ino = dap->da_newinum;
 			dap->da_state &= ~UNDONE;
 			dap->da_state |= ATTACHED;
 			chgs = 1;
 			/*
 			 * If the inode referenced by the directory has
 			 * been written out, then the dependency can be
 			 * moved to the pending list.
 			 */
 			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 				LIST_REMOVE(dap, da_pdlist);
 				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
 				    da_pdlist);
 			}
 		}
 	}
 	/*
 	 * If there were any rollbacks in the directory, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (chgs) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
 	}
 	/*
 	 * If no dependencies remain, the pagedep will be freed.
 	 * Otherwise it will remain to update the page before it
 	 * is written back to disk.
 	 */
 	if (LIST_FIRST(&pagedep->pd_pendinghd) == 0) {
 		for (i = 0; i < DAHASHSZ; i++)
 			if (LIST_FIRST(&pagedep->pd_diraddhd[i]) != NULL)
 				break;
 		if (i == DAHASHSZ) {
 			LIST_REMOVE(pagedep, pd_hash);
 			WORKITEM_FREE(pagedep, D_PAGEDEP);
 			return (0);
 		}
 	}
 	return (1);
 }
 
 /*
  * Writing back in-core inode structures.
  * 
  * The file system only accesses an inode's contents when it occupies an
  * "in-core" inode structure.  These "in-core" structures are separate from
  * the page frames used to cache inode blocks.  Only the latter are
  * transferred to/from the disk.  So, when the updated contents of the
  * "in-core" inode structure are copied to the corresponding in-memory inode
  * block, the dependencies are also transferred.  The following procedure is
  * called when copying a dirty "in-core" inode to a cached inode block.
  */
 
 /*
  * Called when an inode is loaded from disk. If the effective link count
  * differed from the actual link count when it was last flushed, then we
  * need to ensure that the correct effective link count is put back.
  */
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(&lk);
 	if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	if (inodedep->id_nlinkdelta != 0) {
 		ip->i_effnlink -= inodedep->id_nlinkdelta;
 		ip->i_flag |= IN_MODIFIED;
 		inodedep->id_nlinkdelta = 0;
 		(void) free_inodedep(inodedep);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called just before the "in-core" inode
  * information is to be copied to the in-memory inode block.
  * Recall that an inode block contains several inodes. If
  * the force flag is set, then the dependencies will be
  * cleared so that the update can always be made. Note that
  * the buffer is locked when this routine is called, so we
  * will never be in the middle of writing the inode block 
  * to disk.
  */
 void 
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 	struct buf *bp;		/* the buffer containing the inode block */
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
 	struct worklist *wk;
 	int error, gotit;
 
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
 	 * the inode is (potentially) tossed out of the cache. Otherwise,
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if (ip->i_effnlink != ip->i_nlink) {
 		(void) inodedep_lookup(ip->i_fs, ip->i_number, DEPALLOC,
 		    &inodedep);
 	} else if (inodedep_lookup(ip->i_fs, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_update_inodeblock: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
 	/*
 	 * Any new dependencies associated with the incore inode must 
 	 * now be moved to the list associated with the buffer holding
 	 * the in-memory copy of the inode. Once merged process any
 	 * allocdirects that are completed by the merger.
 	 */
 	merge_inode_lists(inodedep);
 	if (TAILQ_FIRST(&inodedep->id_inoupdt) != NULL)
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt));
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
 	 * can be moved to the id_bufwait so that they will be
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
 	 * that allocates them have been written (indicated by
 	 * DEPCOMPLETE being set in id_state). If we are doing a
 	 * forced sync (e.g., an fsync on a file), we force the bitmap
 	 * to be written so that the update can be done.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) != 0 || waitfor == 0) {
 		FREE_LOCK(&lk);
 		return;
 	}
 	gotit = getdirtybuf(&inodedep->id_buf, MNT_WAIT);
 	FREE_LOCK(&lk);
 	if (gotit &&
 	    (error = VOP_BWRITE(inodedep->id_buf->b_vp, inodedep->id_buf)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		panic("softdep_update_inodeblock: update failed");
 }
 
 /*
  * Merge the new inode dependency list (id_newinoupdt) into the old
  * inode dependency list (id_inoupdt). This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 merge_inode_lists(inodedep)
 	struct inodedep *inodedep;
 {
 	struct allocdirect *listadp, *newadp;
 
 	newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	for (listadp = TAILQ_FIRST(&inodedep->id_inoupdt); listadp && newadp;) {
 		if (listadp->ad_lbn < newadp->ad_lbn) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
 		if (listadp->ad_lbn == newadp->ad_lbn) {
 			allocdirect_merge(&inodedep->id_inoupdt, newadp,
 			    listadp);
 			listadp = newadp;
 		}
 		newadp = TAILQ_FIRST(&inodedep->id_newinoupdt);
 	}
 	while ((newadp = TAILQ_FIRST(&inodedep->id_newinoupdt)) != NULL) {
 		TAILQ_REMOVE(&inodedep->id_newinoupdt, newadp, ad_next);
 		TAILQ_INSERT_TAIL(&inodedep->id_inoupdt, newadp, ad_next);
 	}
 }
 
 /*
  * If we are doing an fsync, then we must ensure that any directory
  * entries for the inode have been written after the inode gets to disk.
  */
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 	struct diradd *dap, *olddap;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct worklist *wk;
 	struct mount *mnt;
 	struct vnode *pvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct proc *p = CURPROC;		/* XXX */
 	int error, ret, flushparent;
 	ino_t parentino;
 	ufs_lbn_t lbn;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	for (error = 0, flushparent = 0, olddap = NULL; ; ) {
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ip->i_number, 0, &inodedep) == 0)
 			break;
 		if (LIST_FIRST(&inodedep->id_inowait) != NULL ||
 		    LIST_FIRST(&inodedep->id_bufwait) != NULL ||
 		    TAILQ_FIRST(&inodedep->id_inoupdt) != NULL ||
 		    TAILQ_FIRST(&inodedep->id_newinoupdt) != NULL)
 			panic("softdep_fsync: pending ops");
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
 		if (wk->wk_type != D_DIRADD)
 			panic("softdep_fsync: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 		dap = WK_DIRADD(wk);
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == olddap)
 			panic("softdep_fsync: flush failed");
 		olddap = dap;
 		/*
 		 * Flush our parent if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		mnt = pagedep->pd_mnt;
 		parentino = pagedep->pd_ino;
 		lbn = pagedep->pd_lbn;
 		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)
 			panic("softdep_fsync: dirty");
 		flushparent = dap->da_state & MKDIR_PARENT;
 		/*
 		 * If we are being fsync'ed as part of vgone'ing this vnode,
 		 * then we will not be able to release and recover the
 		 * vnode below, so we just have to give up on writing its
 		 * directory entry out. It will eventually be written, just
 		 * not now, but then the user was not asking to have it
 		 * written, so we are not breaking any promises.
 		 */
 		if (vp->v_flag & VXLOCK)
 			break;
 		/*
 		 * We prevent deadlock by always fetching inodes from the
 		 * root, moving down the directory tree. Thus, when fetching
 		 * our parent directory, we must unlock ourselves before
 		 * requesting the lock on our parent. See the comment in
 		 * ufs_lookup for details on possible races.
 		 */
 		FREE_LOCK(&lk);
 		VOP_UNLOCK(vp, 0, p);
 		if ((error = VFS_VGET(mnt, parentino, &pvp)) != 0) {
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		if (flushparent) {
 			if ((error = UFS_UPDATE(pvp, 1)) != 0) {
 				vput(pvp);
 				return (error);
 			}
 		}
 		/*
 		 * Flush directory page containing the inode's name.
 		 */
 		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), p->p_ucred,
 		    &bp);
 		ret = VOP_BWRITE(bp->b_vp, bp);
 		vput(pvp);
 		if (error != 0)
 			return (error);
 		if (ret != 0)
 			return (ret);
 	}
 	FREE_LOCK(&lk);
 	return (0);
 }
 
 /*
  * Flush all the dirty bitmaps associated with the block device
  * before flushing the rest of the dirty blocks so as to reduce
  * the number of dependencies that will have to be rolled back.
  */
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 
 	if (vp->v_type != VBLK)
 		panic("softdep_fsync_mountdev: vnode not VBLK");
 	ACQUIRE_LOCK(&lk);
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
 		 */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("softdep_fsync_mountdev: not dirty");
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
 		    wk->wk_type != D_BMSAFEMAP) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		FREE_LOCK(&lk);
 		(void) bawrite(bp);
 		ACQUIRE_LOCK(&lk);
 		/*
 		 * Since we may have slept during the I/O, we need 
 		 * to start from a known point.
 		 */
 		nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 	}
 	drain_output(vp, 1);
 	FREE_LOCK(&lk);
 }
 
 /*
  * This routine is called when we are trying to synchronously flush a
  * file. This routine must eliminate any filesystem metadata dependencies
  * so that the syncing routine can succeed by pushing the dirty blocks
  * associated with the file. If any I/O errors occur, they are returned.
  */
 int
 softdep_sync_metadata(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct pagedep *pagedep;
 	struct allocdirect *adp;
 	struct allocindir *aip;
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	int i, error, waitfor;
 
 	/*
 	 * Check whether this vnode is involved in a filesystem
 	 * that is doing soft dependency processing.
 	 */
 	if (vp->v_type != VBLK) {
 		if (!DOINGSOFTDEP(vp))
 			return (0);
 	} else
 		if (vp->v_specmountpoint == NULL ||
 		    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP) == 0)
 			return (0);
 	/*
 	 * Ensure that any direct block dependencies have been cleared.
 	 */
 	ACQUIRE_LOCK(&lk);
 	if ((error = flush_inodedep_deps(VTOI(vp)->i_fs, VTOI(vp)->i_number))) {
 		FREE_LOCK(&lk);
 		return (error);
 	}
 	/*
 	 * For most files, the only metadata dependencies are the
 	 * cylinder group maps that allocate their inode or blocks.
 	 * The block allocation dependencies can be found by traversing
 	 * the dependency lists for any buffers that remain on their
 	 * dirty buffer list. The inode allocation dependency will
 	 * be resolved when the inode is updated with MNT_WAIT.
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 */
 	waitfor = MNT_NOWAIT;
 top:
 	if (getdirtybuf(&TAILQ_FIRST(&vp->v_dirtyblkhd), MNT_WAIT) == 0) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 	bp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 loop:
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk;
 	     wk = LIST_NEXT(wk, wk_list)) {
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
 			adp = WK_ALLOCDIRECT(wk);
 			if (adp->ad_state & DEPCOMPLETE)
 				break;
 			nbp = adp->ad_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			if (aip->ai_state & DEPCOMPLETE)
 				break;
 			nbp = aip->ai_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_INDIRDEP:
 		restart:
 			for (aip = LIST_FIRST(&WK_INDIRDEP(wk)->ir_deplisthd);
 			     aip; aip = LIST_NEXT(aip, ai_next)) {
 				if (aip->ai_state & DEPCOMPLETE)
 					continue;
 				nbp = aip->ai_buf;
 				if (getdirtybuf(&nbp, MNT_WAIT) == 0)
 					goto restart;
 				FREE_LOCK(&lk);
 				if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 					bawrite(bp);
 					return (error);
 				}
 				ACQUIRE_LOCK(&lk);
 				goto restart;
 			}
 			break;
 
 		case D_INODEDEP:
 			if ((error = flush_inodedep_deps(WK_INODEDEP(wk)->id_fs,
 			    WK_INODEDEP(wk)->id_ino)) != 0) {
 				FREE_LOCK(&lk);
 				bawrite(bp);
 				return (error);
 			}
 			break;
 
 		case D_PAGEDEP:
 			/*
 			 * We are trying to sync a directory that may
 			 * have dependencies on both its own metadata
 			 * and/or dependencies on the inodes of any
 			 * recently allocated files. We walk its diradd
 			 * lists pushing out the associated inode.
 			 */
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
 					continue;
 				if ((error =
 				    flush_pagedep_deps(vp, pagedep->pd_mnt,
 						&pagedep->pd_diraddhd[i]))) {
 					FREE_LOCK(&lk);
 					bawrite(bp);
 					return (error);
 				}
 			}
 			break;
 
 		case D_MKDIR:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_MKDIR(wk)->md_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		case D_BMSAFEMAP:
 			/*
 			 * This case should never happen if the vnode has
 			 * been properly sync'ed. However, if this function
 			 * is used at a place where the vnode has not yet
 			 * been sync'ed, this dependency can show up. So,
 			 * rather than panic, just flush it.
 			 */
 			nbp = WK_BMSAFEMAP(wk)->sm_buf;
 			if (getdirtybuf(&nbp, waitfor) == 0)
 				break;
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(nbp);
 			} else if ((error = VOP_BWRITE(nbp->b_vp, nbp)) != 0) {
 				bawrite(bp);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 
 		default:
 			panic("softdep_sync_metadata: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	(void) getdirtybuf(&TAILQ_NEXT(bp, b_vnbufs), MNT_WAIT);
 	nbp = TAILQ_NEXT(bp, b_vnbufs);
 	FREE_LOCK(&lk);
 	bawrite(bp);
 	ACQUIRE_LOCK(&lk);
 	if (nbp != NULL) {
 		bp = nbp;
 		goto loop;
 	}
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, proceed with the second pass
 	 * which will wait for the I/O as per above.
 	 */
 	drain_output(vp, 1);
 	/*
 	 * The brief unlock is to allow any pent up dependency
 	 * processing to be done.
 	 */
 	if (waitfor == MNT_NOWAIT) {
 		waitfor = MNT_WAIT;
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		goto top;
 	}
 
 	/*
 	 * If we have managed to get rid of all the dirty buffers,
 	 * then we are done. For certain directories and block
 	 * devices, we may need to do further work.
 	 */
 	if (TAILQ_FIRST(&vp->v_dirtyblkhd) == NULL) {
 		FREE_LOCK(&lk);
 		return (0);
 	}
 
 	FREE_LOCK(&lk);
 	/*
 	 * If we are trying to sync a block device, some of its buffers may
 	 * contain metadata that cannot be written until the contents of some
 	 * partially written files have been written to disk. The only easy
 	 * way to accomplish this is to sync the entire filesystem (luckily
 	 * this happens rarely).
 	 */
 	if (vp->v_type == VBLK && vp->v_specmountpoint && !VOP_ISLOCKED(vp) &&
 	    (error = VFS_SYNC(vp->v_specmountpoint, MNT_WAIT, ap->a_cred,
 	     ap->a_p)) != 0)
 		return (error);
 	return (0);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
  * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(fs, ino)
 	struct fs *fs;
 	ino_t ino;
 {
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	int error, waitfor;
 	struct buf *bp;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
 	for (waitfor = MNT_NOWAIT; ; ) {
 		FREE_LOCK(&lk);
 		ACQUIRE_LOCK(&lk);
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			return (0);
 		for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 		     adp = TAILQ_NEXT(adp, ad_next)) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		for (adp = TAILQ_FIRST(&inodedep->id_newinoupdt); adp;
 		     adp = TAILQ_NEXT(adp, ad_next)) {
 			if (adp->ad_state & DEPCOMPLETE)
 				continue;
 			bp = adp->ad_buf;
 			if (getdirtybuf(&bp, waitfor) == 0) {
 				if (waitfor == MNT_NOWAIT)
 					continue;
 				break;
 			}
 			FREE_LOCK(&lk);
 			if (waitfor == MNT_NOWAIT) {
 				bawrite(bp);
 			} else if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0) {
 				ACQUIRE_LOCK(&lk);
 				return (error);
 			}
 			ACQUIRE_LOCK(&lk);
 			break;
 		}
 		if (adp != NULL)
 			continue;
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
 		if (waitfor == MNT_WAIT)
 			break;
 		waitfor = MNT_WAIT;
 	}
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
 	if (inodedep_lookup(fs, ino, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	return (0);
 }
 
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct vnode *pvp;
 	struct mount *mp;
 	struct diraddhd *diraddhdp;
 {
 	struct proc *p = CURPROC;	/* XXX */
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
 	int gotit, error = 0;
 	struct buf *bp;
 	ino_t inum;
 
 	ump = VFSTOUFS(mp);
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(&lk);
 			if ((error = UFS_UPDATE(pvp, 1)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_PARENT)
 				panic("flush_pagedep_deps: MKDIR");
 		}
 		/*
 		 * Flush the file on which the directory entry depends.
 		 * If the inode has already been pushed out of the cache,
 		 * then all the block dependencies will have been flushed
 		 * leaving only inode dependencies (e.g., bitmaps). Thus,
 		 * we do a ufs_ihashget to check for the vnode in the cache.
 		 * If it is there, we do a full flush. If it is no longer
 		 * there we need only dispose of any remaining bitmap
 		 * dependencies and write the inode to disk.
 		 */
 		inum = dap->da_newinum;
 		FREE_LOCK(&lk);
 		if ((vp = ufs_ihashget(ump->um_dev, inum)) == NULL) {
 			ACQUIRE_LOCK(&lk);
 			if (inodedep_lookup(ump->um_fs, inum, 0, &inodedep) == 0
 			    && dap == LIST_FIRST(diraddhdp))
 				panic("flush_pagedep_deps: flush 1 failed");
 			/*
 			 * If the inode still has bitmap dependencies,
 			 * push them to disk.
 			 */
 			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 				gotit = getdirtybuf(&inodedep->id_buf,MNT_WAIT);
 				FREE_LOCK(&lk);
 				if (gotit &&
 				    (error = VOP_BWRITE(inodedep->id_buf->b_vp,
 				     inodedep->id_buf)) != 0)
 					break;
 				ACQUIRE_LOCK(&lk);
 			}
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			/*
 			 * If the inode is still sitting in a buffer waiting
 			 * to be written, push it to disk.
 			 */
 			FREE_LOCK(&lk);
 			if ((error = bread(ump->um_devvp,
 			    fsbtodb(ump->um_fs, ino_to_fsba(ump->um_fs, inum)),
 			    (int)ump->um_fs->fs_bsize, NOCRED, &bp)) != 0)
 				break;
 			if ((error = VOP_BWRITE(bp->b_vp, bp)) != 0)
 				break;
 			ACQUIRE_LOCK(&lk);
 			if (dap == LIST_FIRST(diraddhdp))
 				panic("flush_pagedep_deps: flush 2 failed");
 			continue;
 		}
 		if (vp->v_type == VDIR) {
 			/*
 			 * A newly allocated directory must have its "." and
 			 * ".." entries written out before its name can be
 			 * committed in its parent. We do not want or need
 			 * the full semantics of a synchronous VOP_FSYNC as
 			 * that may end up here again, once for each directory
 			 * level in the filesystem. Instead, we push the blocks
 			 * and wait for them to clear.
 			 */
 			if ((error =
 			    VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p))) {
 				vput(vp);
 				break;
 			}
 			drain_output(vp, 0);
 		}
 		error = UFS_UPDATE(vp, 1);
 		vput(vp);
 		if (error)
 			break;
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == LIST_FIRST(diraddhdp))
 			panic("flush_pagedep_deps: flush 3 failed");
 		ACQUIRE_LOCK(&lk);
 	}
 	if (error)
 		ACQUIRE_LOCK(&lk);
 	return (error);
 }
 
 /*
  * A large burst of file addition or deletion activity can drive the
  * memory load excessively high. Therefore we deliberately slow things
  * down and speed up the I/O processing if we find ourselves with too
  * many dependencies in progress.
  */
 static int
 request_cleanup(resource, islocked)
 	int resource;
 	int islocked;
 {
 	struct callout_handle handle;
 	struct proc *p = CURPROC;
 
 	/*
 	 * We never hold up the filesystem syncer process.
 	 */
 	if (p == filesys_syncer)
 		return (0);
 	/*
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
 	 * with removal dependencies. We would like to do the cleanup
 	 * here, but we probably hold an inode locked at this point and 
 	 * that might deadlock against one that we try to clean. So,
 	 * the best that we can do is request the syncer daemon to do
 	 * the cleanup for us.
 	 */
 	switch (resource) {
 
 	case FLUSH_INODES:
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps = 1;
 		break;
 
 	case FLUSH_REMOVE:
 		stat_blk_limit_push += 1;
 		req_clear_remove = 1;
 		break;
 
 	default:
 		panic("request_cleanup: unknown type");
 	}
 	/*
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
 	if (islocked == 0)
 		ACQUIRE_LOCK(&lk);
 	if (proc_waiting == 0) {
 		proc_waiting = 1;
 		handle = timeout(pause_timer, NULL,
 		    tickdelay > 2 ? tickdelay : 2);
 	}
 	FREE_LOCK_INTERLOCKED(&lk);
 	(void) tsleep((caddr_t)&proc_waiting, PPAUSE | PCATCH, "softupdate", 0);
 	ACQUIRE_LOCK_INTERLOCKED(&lk);
 	if (proc_waiting) {
 		untimeout(pause_timer, NULL, handle);
 		proc_waiting = 0;
 	} else {
 		switch (resource) {
 
 		case FLUSH_INODES:
 			stat_ino_limit_hit += 1;
 			break;
 
 		case FLUSH_REMOVE:
 			stat_blk_limit_hit += 1;
 			break;
 		}
 	}
 	if (islocked == 0)
 		FREE_LOCK(&lk);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
  * to indicate that there is no longer a timer running.
  */
 void
 pause_timer(arg)
 	void *arg;
 {
 
 	proc_waiting = 0;
 	wakeup(&proc_waiting);
 }
 
 /*
  * Flush out a directory with at least one removal dependency in an effort
  * to reduce the number of freefile and freeblks dependency structures.
  */
 static void
 clear_remove(p)
 	struct proc *p;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	int error, cnt;
 	ino_t ino;
 
 	ACQUIRE_LOCK(&lk);
 	for (cnt = 0; cnt < pagedep_hash; cnt++) {
 		pagedephd = &pagedep_hashtbl[next++];
 		if (next >= pagedep_hash)
 			next = 0;
 		for (pagedep = LIST_FIRST(pagedephd); pagedep;
 		     pagedep = LIST_NEXT(pagedep, pd_hash)) {
 			if (LIST_FIRST(&pagedep->pd_dirremhd) == NULL)
 				continue;
 			mp = pagedep->pd_mnt;
 			ino = pagedep->pd_ino;
 			FREE_LOCK(&lk);
 			if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 				softdep_error("clear_remove: vget", error);
 				return;
 			}
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_remove: fsync", error);
 			drain_output(vp, 0);
 			vput(vp);
 			return;
 		}
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Clear out a block of dirty inodes in an effort to reduce
  * the number of inodedep dependency structures.
  */
 static void
 clear_inodedeps(p)
 	struct proc *p;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	static int next = 0;
 	struct mount *mp;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
 	ACQUIRE_LOCK(&lk);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
 	for (cnt = 0; cnt < inodedep_hash; cnt++) {
 		inodedephd = &inodedep_hashtbl[next++];
 		if (next >= inodedep_hash)
 			next = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	/*
 	 * Ugly code to find mount point given pointer to superblock.
 	 */
 	fs = inodedep->id_fs;
 	for (mp = CIRCLEQ_FIRST(&mountlist); mp != (void *)&mountlist;
 	     mp = CIRCLEQ_NEXT(mp, mnt_list))
 		if ((mp->mnt_flag & MNT_SOFTDEP) && fs == VFSTOUFS(mp)->um_fs)
 			break;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
 	firstino = inodedep->id_ino & ~(INOPB(fs) - 1);
 	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
 		if (inodedep_lookup(fs, lastino, 0, &inodedep) != 0)
 			break;
 	/*
 	 * Asynchronously push all but the last inode with dependencies.
 	 * Synchronously push the last inode with dependencies to ensure
 	 * that the inode block gets written to free up the inodedeps.
 	 */
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(fs, ino, 0, &inodedep) == 0)
 			continue;
 		FREE_LOCK(&lk);
 		if ((error = VFS_VGET(mp, ino, &vp)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			return;
 		}
 		if (ino == lastino) {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_WAIT, p)))
 				softdep_error("clear_inodedeps: fsync1", error);
 		} else {
 			if ((error = VOP_FSYNC(vp, p->p_ucred, MNT_NOWAIT, p)))
 				softdep_error("clear_inodedeps: fsync2", error);
 			drain_output(vp, 0);
 		}
 		vput(vp);
 		ACQUIRE_LOCK(&lk);
 	}
 	FREE_LOCK(&lk);
 }
 
 /*
  * Acquire exclusive access to a buffer.
  * Must be called with splbio blocked.
  * Return 1 if buffer was acquired.
  */
 static int
 getdirtybuf(bpp, waitfor)
 	struct buf **bpp;
 	int waitfor;
 {
 	struct buf *bp;
 
 	for (;;) {
 		if ((bp = *bpp) == NULL)
 			return (0);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) == 0)
 			break;
 		if (waitfor != MNT_WAIT)
 			return (0);
 		FREE_LOCK_INTERLOCKED(&lk);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL) != ENOLCK)
 			panic("getdirtybuf: inconsistent lock");
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
 		return (0);
 	}
 	bremfree(bp);
 	return (1);
 }
 
 /*
  * Wait for pending output on a vnode to complete.
  * Must be called with vnode locked.
  */
 static void
 drain_output(vp, islocked)
 	struct vnode *vp;
 	int islocked;
 {
 
 	if (!islocked)
 		ACQUIRE_LOCK(&lk);
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		FREE_LOCK_INTERLOCKED(&lk);
 		tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "drainvp", 0);
 		ACQUIRE_LOCK_INTERLOCKED(&lk);
 	}
 	if (!islocked)
 		FREE_LOCK(&lk);
 }
 
 /*
  * Called whenever a buffer that is being invalidated or reallocated
  * contains dependencies. This should only happen if an I/O error has
  * occurred. The routine is called with the buffer locked.
  */ 
 void
 softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
 	if ((bp->b_flags & B_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
 	panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
  * Function to handle asynchronous write errors in the filesystem.
  */
 void
 softdep_error(func, error)
 	char *func;
 	int error;
 {
 
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
Index: head/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	(revision 49534)
+++ head/sys/ufs/ffs/ffs_vfsops.c	(revision 49535)
@@ -1,1299 +1,1297 @@
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
- * $Id: ffs_vfsops.c,v 1.99 1999/05/31 11:29:24 phk Exp $
+ * $Id: ffs_vfsops.c,v 1.100 1999/07/11 19:16:50 phk Exp $
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/malloc.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 
 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
 
 static int	ffs_sbupdate __P((struct ufsmount *, int));
 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
 static int	ffs_oldfscompat __P((struct fs *));
 static int	ffs_mount __P((struct mount *, char *, caddr_t,
 				struct nameidata *, struct proc *));
 static int	ffs_init __P((struct vfsconf *));
 
 static struct vfsops ufs_vfsops = {
 	ffs_mount,
 	ufs_start,
 	ffs_unmount,
 	ufs_root,
 	ufs_quotactl,
 	ffs_statfs,
 	ffs_sync,
 	ffs_vget,
 	ffs_fhtovp,
 	ffs_vptofh,
 	ffs_init,
 };
 
 VFS_SET(ufs_vfsops, ufs, 0);
 
 /*
  * ffs_mount
  *
  * Called when mounting local physical media
  *
  * PARAMETERS:
  *		mountroot
  *			mp	mount point structure
  *			path	NULL (flag for root mount!!!)
  *			data	<unused>
  *			ndp	<unused>
  *			p	process (user credentials check [statfs])
  *
  *		mount
  *			mp	mount point structure
  *			path	path to mount point
  *			data	pointer to argument struct in user space
  *			ndp	mount point namei() return (used for
  *				credentials on reload), reused to look
  *				up block device.
  *			p	process (user credentials check)
  *
  * RETURNS:	0	Success
  *		!0	error number (errno.h)
  *
  * LOCK STATE:
  *
  *		ENTRY
  *			mount point is locked
  *		EXIT
  *			mount point is locked
  *
  * NOTES:
  *		A NULL path can be used for a flag since the mount
  *		system call will fail with EFAULT in copyinstr in
  *		namei() if it is a genuine NULL from the user.
  */
 static int
 ffs_mount( mp, path, data, ndp, p)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
         struct nameidata	*ndp;	/* mount point credentials*/
         struct proc		*p;	/* process requesting mount*/
 {
 	size_t		size;
 	int		err = 0;
 	struct vnode	*devvp;
 
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct fs *fs;
 	int error, flags, ronly = 0;
 	mode_t accessmode;
 
 	/*
 	 * Use NULL path to flag a root mount
 	 */
 	if( path == NULL) {
 		/*
 		 ***
 		 * Mounting root file system
 		 ***
 		 */
 	
 		if ((err = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp");
 			return (err);
 		}
 
 		if (bdevsw(rootdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(rootdev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
 			/* fs specific cleanup (if any)*/
 			goto error_1;
 		}
 
 		goto dostatfs;		/* success*/
 
 	}
 
 	/*
 	 ***
 	 * Mounting non-root file system or updating a file system
 	 ***
 	 */
 
 	/* copy in user arguments*/
 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
 	if (err)
 		goto error_1;		/* can't get arguments*/
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
 	 * if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		devvp = ump->um_devvp;
 		err = 0;
 		ronly = fs->fs_ronly;	/* MNT_RELOAD might change this */
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(ump->um_dev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if (ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			if (mp->mnt_flag & MNT_SOFTDEP) {
 				err = softdep_flushfiles(mp, flags, p);
 			} else {
 				err = ffs_flushfiles(mp, flags, p);
 			}
 			ronly = 1;
 		}
 		if (!err && (mp->mnt_flag & MNT_RELOAD))
 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (err) {
 			goto error_1;
 		}
 		if (ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				if ((error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    p->p_ucred, p)) != 0) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 
 			if (fs->fs_clean == 0) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf(
 "WARNING: %s was not properly dismounted\n",
 					    fs->fs_fsmnt);
 				} else {
 					printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 					    fs->fs_fsmnt);
 					err = EPERM;
 					goto error_1;
 				}
 			}
 
 			/* check to see if we need to start softdep */
 			if (fs->fs_flags & FS_DOSOFTDEP) {
 				err = softdep_mount(devvp, mp, fs, p->p_ucred);
 				if (err)
 					goto error_1;
 			}
 
 			ronly = 0;
 		}
 		/*
 		 * Soft updates is incompatible with "async",
 		 * so if we are doing softupdates stop the user
 		 * from setting the async flag in an update.
 		 * Softdep_mount() clears it in an initial mount 
 		 * or ro->rw remount.
 		 */
 		if (mp->mnt_flag & MNT_SOFTDEP) {
 			mp->mnt_flag &= ~MNT_ASYNC;
 		}
 		/* if not updating name...*/
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.  Jumping to "success"
 			 * will return the vfs_export() error code.
 			 */
 			err = vfs_export(mp, &ump->um_export, &args.export);
 			goto success;
 		}
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	err = namei(ndp);
 	if (err) {
 		/* can't get devvp!*/
 		goto error_1;
 	}
 
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		err = ENOTBLK;
 		goto error_2;
 	}
 	if (bdevsw(devvp->v_rdev) == NULL) {
 		err = ENXIO;
 		goto error_2;
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) != 0) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 ********************
 		 * UPDATE
 		 * If it's not the same vnode, or at least the same device
 		 * then it's not correct.
 		 ********************
 		 */
 
 		if (devvp != ump->um_devvp) {
 			if ( devvp->v_rdev == ump->um_devvp->v_rdev) {
 				vrele(devvp);
 			} else {
 				err = EINVAL;	/* needs translation */
 			}
 		} else
 			vrele(devvp);
 		/*
 		 * Update device name only on success
 		 */
 		if( !err) {
 			/* Save "mounted from" info for mount point (NULL pad)*/
 			copyinstr(	args.fspec,
 					mp->mnt_stat.f_mntfromname,
 					MNAMELEN - 1,
 					&size);
 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 		}
 	} else {
 		/*
 		 ********************
 		 * NEW MOUNT
 		 ********************
 		 */
 
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw(devvp->v_rdev)->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 
 		/*
 		 * Since this is a new mount, we want the names for
 		 * the device and the mount point copied in.  If an
 		 * error occurs,  the mountpoint is discarded by the
 		 * upper level code.
 		 */
 		/* Save "last mounted on" info for mount point (NULL pad)*/
 		copyinstr(	path,				/* mount point*/
 				mp->mnt_stat.f_mntonname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 
 		/* Save "mounted from" info for mount point (NULL pad)*/
 		copyinstr(	args.fspec,			/* device name*/
 				mp->mnt_stat.f_mntfromname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 
 		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
 	}
 	if (err) {
 		goto error_2;
 	}
 
 dostatfs:
 	/*
 	 * Initialize FS stat information in mount struct; uses both
 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
 	 *
 	 * This code is common to root and non-root mounts
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 
 	goto success;
 
 
 error_2:	/* error with devvp held*/
 
 	/* release devvp before failing*/
 	vrele(devvp);
 
 error_1:	/* no state to back out*/
 
 success:
 	if (!err && path && (mp->mnt_flag & MNT_UPDATE)) {
 		/* Update clean flag after changing read-onlyness. */
 		fs = ump->um_fs;
 		if (ronly != fs->fs_ronly) {
 			fs->fs_ronly = ronly;
 			fs->fs_clean = ronly &&
 			    (fs->fs_flags & FS_UNCLEAN) == 0 ? 1 : 0;
 			ffs_sbupdate(ump, MNT_WAIT);
 		}
 	}
 	return (err);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 static int
 ffs_reload(mp, cred, p)
 	register struct mount *mp;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	struct csum *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	struct partinfo dpart;
 	dev_t dev;
 	int i, blks, size, error;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		panic("ffs_reload: dirty1");
 
 	dev = devvp->v_rdev;
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  See ffs_mountmfs() for more details.
 	 */
 	if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		vfs_object_create(devvp, p, p->p_ucred);
 		simple_lock(&devvp->v_interlock);
 		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
 	}
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 	if ((error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp)) != 0)
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 		newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOUFS(mp)->um_fs;
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp[0];
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		lp = fs->fs_maxcluster;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 
 loop:
 	simple_lock(&mntvnode_slock);
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		if (vp->v_mount != mp) {
 			simple_unlock(&mntvnode_slock);
 			goto loop;
 		}
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
 		if (vrecycle(vp, &mntvnode_slock, p))
 			goto loop;
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		simple_lock(&vp->v_interlock);
 		simple_unlock(&mntvnode_slock);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 		ip->i_din = *((struct dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		vput(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 int
 ffs_mountfs(devvp, mp, p, malloctype)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct malloc_type *malloctype;
 {
 	register struct ufsmount *ump;
 	struct buf *bp;
 	register struct fs *fs;
 	dev_t dev;
 	struct partinfo dpart;
 	caddr_t base, space;
 	int error, i, blks, size, ronly;
 	int32_t *lp;
 	struct ucred *cred;
 	u_int64_t maxfilesize;					/* XXX */
 	size_t strsize;
 	int ncount;
 
 	dev = devvp->v_rdev;
 	cred = p ? p->p_ucred : NOCRED;
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  This excludes the original MFS implementation.
 	 * Note that it is optional that the backing device be VMIOed.  This
 	 * increases the opportunity for metadata caching.
 	 */
 	if (devvp->v_tag != VT_MFS && devvp->v_type == VBLK) {
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		vfs_object_create(devvp, p, p->p_ucred);
 		simple_lock(&devvp->v_interlock);
 		VOP_UNLOCK(devvp, LK_INTERLOCK, p);
 	}
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 
 	bp = NULL;
 	ump = NULL;
 	if ((error = bread(devvp, SBLOCK, SBSIZE, cred, &bp)) != 0)
 		goto out;
 	fs = (struct fs *)bp->b_data;
 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 	    fs->fs_bsize < sizeof(struct fs)) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	fs->fs_fmod = 0;
 	fs->fs_flags &= ~FS_UNCLEAN;
 	if (fs->fs_clean == 0) {
 		fs->fs_flags |= FS_UNCLEAN;
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf(
 "WARNING: %s was not properly dismounted\n",
 			    fs->fs_fsmnt);
 		} else {
 			printf(
 "WARNING: R/W mount of %s denied.  Filesystem is not clean - run fsck\n",
 			    fs->fs_fsmnt);
 			error = EPERM;
 			goto out;
 		}
 	}
 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 		error = EROFS;          /* needs translation */
 		goto out;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
 	bzero((caddr_t)ump, sizeof *ump);
 	ump->um_malloctype = malloctype;
 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
 	    M_WAITOK);
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
 	fs->fs_ronly = ronly;
 	if (ronly == 0) {
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 	}
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		if ((error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    cred, &bp)) != 0) {
 			free(base, M_UFSMNT);
 			goto out;
 		}
 		bcopy(bp->b_data, space, (u_int)size);
 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
 		space += size;
 		brelse(bp);
 		bp = NULL;
 	}
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = (int32_t *)space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = fs->fs_id[0];
 	mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	if (fs->fs_id[0] == 0 || fs->fs_id[1] == 0 || 
 	    vfs_getvfs(&mp->mnt_stat.f_fsid)) 
 		vfs_getnewfsid(mp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
 	devvp->v_specmountpoint = mp;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
 			fs->fs_fsmnt,			/* copy area*/
 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
 			&strsize);			/* real size*/
 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
 
 	if( mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
 	if (ronly == 0) {
 		if ((fs->fs_flags & FS_DOSOFTDEP) &&
 		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
 			free(base, M_UFSMNT);
 			goto out;
 		}
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
 	}
 	return (0);
 out:
 	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
 	if (ump) {
 		free(ump->um_fs, M_UFSMNT);
 		free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 /*
  * Sanity checks for old file systems.
  *
  * XXX - goes away some day.
  */
 static int
 ffs_oldfscompat(fs)
 	struct fs *fs;
 {
 
 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		fs->fs_nrpos = 8;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 #if 0
 		int i;						/* XXX */
 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
 								/* XXX */
 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
 		for (i = 0; i < NIADDR; i++) {			/* XXX */
 			sizepb *= NINDIR(fs);			/* XXX */
 			fs->fs_maxfilesize += sizepb;		/* XXX */
 		}						/* XXX */
 #endif
 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
 	}							/* XXX */
 	return (0);
 }
 
 /*
  * unmount system call
  */
 int
 ffs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct fs *fs;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 	}
 	if (mp->mnt_flag & MNT_SOFTDEP) {
 		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
 			return (error);
 	} else {
 		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
 			return (error);
 	}
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 0) {
 		fs->fs_clean = fs->fs_flags & FS_UNCLEAN ? 0 : 1;
 		error = ffs_sbupdate(ump, MNT_WAIT);
 		if (error) {
 			fs->fs_clean = 0;
 			return (error);
 		}
 	}
 	ump->um_devvp->v_specmountpoint = NULL;
 
 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 
 	vrele(ump->um_devvp);
 
 	free(fs->fs_csp[0], M_UFSMNT);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(mp);
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(p, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
         /*
 	 * Flush all the files.
 	 */
 	if ((error = vflush(mp, NULL, flags)) != 0)
 		return (error);
 	/*
 	 * Flush filesystem metadata.
 	 */
 	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
 	VOP_UNLOCK(ump->um_devvp, 0, p);
 	return (error);
 }
 
 /*
  * Get file system statistics.
  */
 int
 ffs_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_MAGIC)
 		panic("ffs_statfs");
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 		fs->fs_cstotal.cs_nffree;
 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 int
 ffs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *nvp, *vp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	int error, allerror = 0;
 
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ffs_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		ip = VTOI(vp);
 		if ((vp->v_type == VNON) || (((ip->i_flag &
 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
 		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || (waitfor == MNT_LAZY)))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		if (vp->v_type != VCHR) {
 			simple_unlock(&mntvnode_slock);
 			error =
 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 			if (error) {
 				simple_lock(&mntvnode_slock);
 				if (error == ENOENT)
 					goto loop;
 				continue;
 			}
 			if ((error = VOP_FSYNC(vp, cred, waitfor, p)) != 0)
 				allerror = error;
 			VOP_UNLOCK(vp, 0, p);
 			vrele(vp);
 			simple_lock(&mntvnode_slock);
 		} else {
 			simple_unlock(&mntvnode_slock);
 			simple_unlock(&vp->v_interlock);
 			/* UFS_UPDATE(vp, waitfor == MNT_WAIT); */
 			UFS_UPDATE(vp, 0);
 			simple_lock(&mntvnode_slock);
 		}
 	}
 	simple_unlock(&mntvnode_slock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
 	if (waitfor != MNT_LAZY) {
 		if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
 			waitfor = MNT_NOWAIT;
 		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
 			allerror = error;
 		VOP_UNLOCK(ump->um_devvp, 0, p);
 	}
 #ifdef QUOTA
 	qsync(mp);
 #endif
 	/*
 	 * Write back modified superblock.
 	 */
 	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
 		allerror = error;
 	return (allerror);
 }
 
 /*
  * Look up a FFS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int ffs_inode_hash_lock;
 
 int
 ffs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int error;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 restart:
 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
 		return (0);
 	}
 
 	/*
 	 * Lock out the creation of new entries in the FFS hash table in
 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
 	 * may occur!
 	 */
 	if (ffs_inode_hash_lock) {
 		while (ffs_inode_hash_lock) {
 			ffs_inode_hash_lock = -1;
 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
 		}
 		goto restart;
 	}
 	ffs_inode_hash_lock = 1;
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ffs_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ffs_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	MALLOC(ip, struct inode *, sizeof(struct inode), 
 	    ump->um_malloctype, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
 	if (error) {
 		if (ffs_inode_hash_lock < 0)
 			wakeup(&ffs_inode_hash_lock);
 		ffs_inode_hash_lock = 0;
 		*vpp = NULL;
 		FREE(ip, ump->um_malloctype);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	ufs_ihashins(ip);
 
 	if (ffs_inode_hash_lock < 0)
 		wakeup(&ffs_inode_hash_lock);
 	ffs_inode_hash_lock = 0;
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
 		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
 	if (error) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
 	 */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
 	}						/* XXX */
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 int
 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	register struct ufid *ufhp;
 	struct fs *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 ffs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct inode *ip;
 	register struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the filesystem; just use ufs_init.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
 	softdep_initialize();
 	return (ufs_init(vfsp));
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ffs_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	register struct fs *dfs, *fs = mp->um_fs;
 	register struct buf *bp;
 	int blks;
 	caddr_t space;
 	int i, size, error, allerror = 0;
 
 	/*
 	 * First write back the summary information.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = (caddr_t)fs->fs_csp[0];
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 		    size, 0, 0);
 		bcopy(space, bp->b_data, (u_int)size);
 		space += size;
 		if (waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if ((error = bwrite(bp)) != 0)
 			allerror = error;
 	}
 	/*
 	 * Now write back the superblock itself. If any errors occurred
 	 * up to this point, then fail so that the superblock avoids
 	 * being written out as clean.
 	 */
 	if (allerror)
 		return (allerror);
 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
 	fs->fs_fmod = 0;
 	fs->fs_time = time_second;
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	/* Restore compatibility to old file systems.		   XXX */
 	dfs = (struct fs *)bp->b_data;				/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		dfs->fs_nrpos = -1;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 		int32_t *lp, tmp;				/* XXX */
 								/* XXX */
 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
 		tmp = lp[4];					/* XXX */
 		for (i = 4; i > 0; i--)				/* XXX */
 			lp[i] = lp[i-1];			/* XXX */
 		lp[0] = tmp;					/* XXX */
 	}							/* XXX */
 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
 	if (waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if ((error = bwrite(bp)) != 0)
 		allerror = error;
 	return (allerror);
 }
Index: head/sys/ufs/ffs/ffs_vnops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vnops.c	(revision 49534)
+++ head/sys/ufs/ffs/ffs_vnops.c	(revision 49535)
@@ -1,266 +1,265 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
- * $Id: ffs_vnops.c,v 1.57 1999/06/18 05:49:46 mckusick Exp $
+ * $Id: ffs_vnops.c,v 1.58 1999/06/26 02:46:39 mckusick Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
+#include <sys/conf.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
-
-#include <miscfs/specfs/specdev.h>
 
 static int	ffs_fsync __P((struct vop_fsync_args *));
 static int	ffs_getpages __P((struct vop_getpages_args *));
 static int	ffs_putpages __P((struct vop_putpages_args *));
 static int	ffs_read __P((struct vop_read_args *));
 static int	ffs_write __P((struct vop_write_args *));
 
 /* Global vfs data structures for ufs. */
 vop_t **ffs_vnodeop_p;
 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ &vop_getpages_desc,		(vop_t *) ffs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) ffs_putpages },
 	{ &vop_read_desc,		(vop_t *) ffs_read },
 	{ &vop_balloc_desc,		(vop_t *) ffs_balloc },
 	{ &vop_reallocblks_desc,	(vop_t *) ffs_reallocblks },
 	{ &vop_write_desc,		(vop_t *) ffs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
 
 vop_t **ffs_specop_p;
 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_specop_opv_desc =
 	{ &ffs_specop_p, ffs_specop_entries };
 
 vop_t **ffs_fifoop_p;
 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatefifo },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_fifoop_opv_desc =
 	{ &ffs_fifoop_p, ffs_fifoop_entries };
 
 VNODEOP_SET(ffs_vnodeop_opv_desc);
 VNODEOP_SET(ffs_specop_opv_desc);
 VNODEOP_SET(ffs_fifoop_opv_desc);
 
 #include <ufs/ufs/ufs_readwrite.c>
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ffs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp;
 	struct buf *nbp;
 	int s, error, passes, skipmeta;
 	daddr_t lbn;
 
 
 	if (vp->v_type == VBLK) {
 		lbn = INT_MAX;
 		if (vp->v_specmountpoint != NULL &&
 		    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP))
 			softdep_fsync_mountdev(vp);
 	} else {
 		struct inode *ip;
 		ip = VTOI(vp);
 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	}
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 	passes = NIADDR + 1;
 	skipmeta = 0;
 	if (ap->a_waitfor == MNT_WAIT)
 		skipmeta = 1;
 	s = splbio();
 loop:
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp;
 	     bp = TAILQ_NEXT(bp, b_vnbufs))
 		bp->b_flags &= ~B_SCANNED;
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		/* 
 		 * First time through on a synchronous call,
 		 * or if it's already scheduled, skip to the next 
 		 * buffer
 		 */
 		if ((bp->b_flags & B_SCANNED) ||
 		    ((skipmeta == 1) && (bp->b_lblkno < 0)) ||
 		    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
 		/*
 		 * If data is outstanding to another vnode, or we were
 		 * asked to wait for everything, or it's not a file or BDEV,
 		 * start the IO on this buffer immediatly.
 		 */
 		bp->b_flags |= B_SCANNED;
 		if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) ||
 		    ((vp->v_type != VREG) && (vp->v_type != VBLK))) {
 
 			/*
 			 * On our final pass through, do all I/O synchronously
 			 * so that we can find out if our flush is failing
 			 * because of write errors.
 			 */
 			if (passes > 0 || (ap->a_waitfor != MNT_WAIT)) {
 				if ((bp->b_flags & B_CLUSTEROK) &&
 				    ap->a_waitfor != MNT_WAIT) {
 					BUF_UNLOCK(bp);
 					(void) vfs_bio_awrite(bp);
 				} else {
 					bremfree(bp);
 					splx(s);
 					(void) bawrite(bp);
 					s = splbio();
 				}
 			} else {
 				bremfree(bp);
 				splx(s);
 				if ((error = bwrite(bp)) != 0)
 					return (error);
 				s = splbio();
 			}
 		} else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
 			/* 
 			 * If the buffer is for data that has been truncated
 			 * off the file, then throw it away.
 			 */
 			bremfree(bp);
 			bp->b_flags |= B_INVAL | B_NOCACHE;
 			splx(s);
 			brelse(bp);
 			s = splbio();
 		} else {
 			BUF_UNLOCK(bp);
 			vfs_bio_awrite(bp);
 		}
 		/*
 		 * Since we may have slept during the I/O, we need 
 		 * to start from a known point.
 		 */
 		nbp = TAILQ_FIRST(&vp->v_dirtyblkhd);
 	}
 	/*
 	 * If we were asked to do this synchronously, then go back for
 	 * another pass, this time doing the metadata.
 	 */
 	if (skipmeta) {
 		skipmeta = 0;
 		goto loop;
 	}
 
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput,
 					PRIBIO + 4, "ffsfsn", 0);
   		}
 
 		/* 
 		 * Ensure that any filesystem metatdata associated
 		 * with the vnode has been written.
 		 */
 		splx(s);
 		if ((error = softdep_sync_metadata(ap)) != 0)
 			return (error);
 		s = splbio();
 
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			/*
 			 * Block devices associated with filesystems may
 			 * have new I/O requests posted for them even if
 			 * the vnode is locked, so no amount of trying will
 			 * get them clean. Thus we give block devices a
 			 * good effort, then just give up. For all other file
 			 * types, go around and try again until it is clean.
 			 */
 			if (passes > 0) {
 				passes -= 1;
 				goto loop;
 			}
 #ifdef DIAGNOSTIC
 			if (vp->v_type != VBLK)
 				vprint("ffs_fsync: dirty", vp);
 #endif
 		}
 	}
 	splx(s);
 	return (UFS_UPDATE(vp, ap->a_waitfor == MNT_WAIT));
 }
Index: head/sys/ufs/mfs/mfs_vnops.c
===================================================================
--- head/sys/ufs/mfs/mfs_vnops.c	(revision 49534)
+++ head/sys/ufs/mfs/mfs_vnops.c	(revision 49535)
@@ -1,429 +1,428 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mfs_vnops.c	8.11 (Berkeley) 5/22/95
- * $Id: mfs_vnops.c,v 1.44 1999/05/02 23:56:57 alc Exp $
+ * $Id: mfs_vnops.c,v 1.45 1999/06/26 02:46:41 mckusick Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/sysproto.h>
 #include <sys/mman.h>
-
-#include <miscfs/specfs/specdev.h>
+#include <sys/conf.h>
 
 #include <ufs/mfs/mfsnode.h>
 #include <ufs/mfs/mfs_extern.h>
 
 static int	mfs_badop __P((struct vop_generic_args *));
 static int	mfs_bmap __P((struct vop_bmap_args *));
 static int	mfs_close __P((struct vop_close_args *));
 static int	mfs_fsync __P((struct vop_fsync_args *));
 static int	mfs_freeblks __P((struct vop_freeblks_args *));
 static int	mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
 static int	mfs_open __P((struct vop_open_args *));
 static int	mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
 static int	mfs_print __P((struct vop_print_args *)); /* XXX */
 static int	mfs_strategy __P((struct vop_strategy_args *)); /* XXX */
 static int	mfs_getpages __P((struct vop_getpages_args *)); /* XXX */
 /*
  * mfs vnode operations.
  */
 vop_t **mfs_vnodeop_p;
 static struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) mfs_badop },
 	{ &vop_bmap_desc,		(vop_t *) mfs_bmap },
 	{ &vop_bwrite_desc,		(vop_t *) vop_defaultop },
 	{ &vop_close_desc,		(vop_t *) mfs_close },
 	{ &vop_freeblks_desc,		(vop_t *) mfs_freeblks },
 	{ &vop_fsync_desc,		(vop_t *) mfs_fsync },
 	{ &vop_getpages_desc,		(vop_t *) mfs_getpages },
 	{ &vop_inactive_desc,		(vop_t *) mfs_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) vop_enotty },
 	{ &vop_islocked_desc,		(vop_t *) vop_defaultop },
 	{ &vop_lock_desc,		(vop_t *) vop_defaultop },
 	{ &vop_open_desc,		(vop_t *) mfs_open },
 	{ &vop_print_desc,		(vop_t *) mfs_print },
 	{ &vop_reclaim_desc,		(vop_t *) mfs_reclaim },
 	{ &vop_strategy_desc,		(vop_t *) mfs_strategy },
 	{ &vop_unlock_desc,		(vop_t *) vop_defaultop },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc mfs_vnodeop_opv_desc =
 	{ &mfs_vnodeop_p, mfs_vnodeop_entries };
 
 VNODEOP_SET(mfs_vnodeop_opv_desc);
 
 /*
  * Vnode Operations.
  *
  * Open called to allow memory filesystem to initialize and
  * validate before actual IO. Record our process identifier
  * so we can tell when we are doing I/O to ourself.
  */
 /* ARGSUSED */
 static int
 mfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	if (ap->a_vp->v_type != VBLK) {
 		panic("mfs_open not VBLK");
 		/* NOTREACHED */
 	}
 	return (0);
 }
 
 static int
 mfs_fsync(ap)
 	struct vop_fsync_args *ap;
 {
 
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_fsync), ap));
 }
 
 /*
  * mfs_freeblks() - hook to allow us to free physical memory.
  *
  *	We implement the B_FREEBUF strategy.  We can't just madvise()
  *	here because we have to do it in the correct order vs other bio
  *	requests, so we queue it.
  *
  *	Note: geteblk() sets B_INVAL.  We leave it set to guarentee buffer
  *	throw-away on brelse()? XXX
  */
 
 static int
 mfs_freeblks(ap)
         struct vop_freeblks_args /* {   
                 struct vnode *a_vp;     
                 daddr_t a_addr;         
                 daddr_t a_length;       
         } */ *ap;
 {       
 	struct buf *bp;
 	struct vnode *vp;
 
 	if (!vfinddev(ap->a_vp->v_rdev, VBLK, &vp) || vp->v_usecount == 0)
 		panic("mfs_freeblks: bad dev");
 
 	bp = geteblk(ap->a_length);
 	bp->b_flags |= B_FREEBUF | B_ASYNC;
 	bp->b_dev = ap->a_vp->v_rdev;
 	bp->b_blkno = ap->a_addr;
 	bp->b_offset = dbtob(ap->a_addr);
 	bp->b_bcount = ap->a_length;
 	BUF_KERNPROC(bp);
 	VOP_STRATEGY(vp, bp);
 	return(0);
 }
 
 /*
  * Pass I/O requests to the memory filesystem process.
  */
 static int
 mfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct mfsnode *mfsp;
 	struct vnode *vp;
 	struct proc *p = curproc;		/* XXX */
 	int s;
 
 	if (!vfinddev(bp->b_dev, VBLK, &vp) || vp->v_usecount == 0)
 		panic("mfs_strategy: bad dev");
 	mfsp = VTOMFS(vp);
 
 	/*
 	 * splbio required for queueing/dequeueing, in case of forwarded
 	 * BPs from bio interrupts (?).  It may not be necessary.
 	 */
 
 	s = splbio();
 
 	if (mfsp->mfs_pid == 0) {
 		/*
 		 * mini-root.  Note: B_FREEBUF not supported at the moment,
 		 * I'm not sure what kind of dataspace b_data is in.
 		 */
 		caddr_t base;
 
 		base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
 		if (bp->b_flags & B_FREEBUF)
 			;
 		if (bp->b_flags & B_READ)
 			bcopy(base, bp->b_data, bp->b_bcount);
 		else
 			bcopy(bp->b_data, base, bp->b_bcount);
 		biodone(bp);
 	} else if (mfsp->mfs_pid == p->p_pid) {
 		/*
 		 * VOP to self
 		 */
 		splx(s);
 		mfs_doio(bp, mfsp);
 		s = splbio();
 	} else {
 		/*
 		 * VOP from some other process, queue to MFS process and
 		 * wake it up.
 		 */
 		bufq_insert_tail(&mfsp->buf_queue, bp);
 		wakeup((caddr_t)vp);
 	}
 	splx(s);
 	return (0);
 }
 
 /*
  * Memory file system I/O.
  *
  * Trivial on the HP since buffer has already been mapping into KVA space.
  *
  * Read and Write are handled with a simple copyin and copyout.    
  *
  * We also partially support VOP_FREEBLKS() via B_FREEBUF.  We can't implement
  * completely -- for example, on fragments or inode metadata, but we can
  * implement it for page-aligned requests.
  */
 void
 mfs_doio(bp, mfsp)
 	register struct buf *bp;
 	struct mfsnode *mfsp;
 {
 	caddr_t base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
 
 	if (bp->b_flags & B_FREEBUF) {
 		/*
 		 * Implement B_FREEBUF, which allows the filesystem to tell
 		 * a block device when blocks are no longer needed (like when
 		 * a file is deleted).  We use the hook to MADV_FREE the VM.
 		 * This makes an MFS filesystem work as well or better then
 		 * a sun-style swap-mounted filesystem.
 		 */
 		int bytes = bp->b_bcount;
 
 		if ((vm_offset_t)base & PAGE_MASK) {
 			int n = PAGE_SIZE - ((vm_offset_t)base & PAGE_MASK);
 			bytes -= n;
 			base += n;
 		}
                 if (bytes > 0) {
                         struct madvise_args uap;
 
 			bytes &= ~PAGE_MASK;
 			if (bytes != 0) {
 				bzero(&uap, sizeof(uap));
 				uap.addr  = base;
 				uap.len   = bytes;
 				uap.behav = MADV_FREE;
 				madvise(curproc, &uap);
 			}
                 }
 		bp->b_error = 0;
 	} else if (bp->b_flags & B_READ) {
 		/*
 		 * Read data from our 'memory' disk
 		 */
 		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
 	} else {
 		/*
 		 * Write data to our 'memory' disk
 		 */
 		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
 	}
 	if (bp->b_error)
 		bp->b_flags |= B_ERROR;
 	biodone(bp);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 mfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		ufs_daddr_t  a_bn;
 		struct vnode **a_vpp;
 		ufs_daddr_t *a_bnp;
 		int *a_runp;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	return (0);
 }
 
 /*
  * Memory filesystem close routine
  */
 /* ARGSUSED */
 static int
 mfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct mfsnode *mfsp = VTOMFS(vp);
 	register struct buf *bp;
 	int error;
 
 	/*
 	 * Finish any pending I/O requests.
 	 */
 	while ((bp = bufq_first(&mfsp->buf_queue)) != NULL) {
 		bufq_remove(&mfsp->buf_queue, bp);
 		mfs_doio(bp, mfsp);
 		wakeup((caddr_t)bp);
 	}
 	/*
 	 * On last close of a memory filesystem
 	 * we must invalidate any in core blocks, so that
 	 * we can, free up its vnode.
 	 */
 	if ((error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0)) != 0)
 		return (error);
 	/*
 	 * There should be no way to have any more uses of this
 	 * vnode, so if we find any other uses, it is a panic.
 	 */
 	if (vp->v_usecount > 1)
 		printf("mfs_close: ref count %d > 1\n", vp->v_usecount);
 	if (vp->v_usecount > 1 || (bufq_first(&mfsp->buf_queue) != NULL))
 		panic("mfs_close");
 	/*
 	 * Send a request to the filesystem server to exit.
 	 */
 	mfsp->mfs_active = 0;
 	wakeup((caddr_t)vp);
 	return (0);
 }
 
 /*
  * Memory filesystem inactive routine
  */
 /* ARGSUSED */
 static int
 mfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mfsnode *mfsp = VTOMFS(vp);
 
 	if (bufq_first(&mfsp->buf_queue) != NULL)
 		panic("mfs_inactive: not inactive (next buffer %p)",
 			bufq_first(&mfsp->buf_queue));
 	VOP_UNLOCK(vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Reclaim a memory filesystem devvp so that it can be reused.
  */
 static int
 mfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	FREE(vp->v_data, M_MFSNODE);
 	vp->v_data = NULL;
 	return (0);
 }
 
 /*
  * Print out the contents of an mfsnode.
  */
 static int
 mfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
 
 	printf("tag VT_MFS, pid %ld, base %p, size %ld\n",
 	    (long)mfsp->mfs_pid, (void *)mfsp->mfs_baseoff, mfsp->mfs_size);
 	return (0);
 }
 
 /*
  * Block device bad operation
  */
 static int
 mfs_badop(struct vop_generic_args *ap)
 {
 	int i;
 
 	printf("mfs_badop[%s]\n", ap->a_desc->vdesc_name);
 	i = vop_defaultop(ap);
 	printf("mfs_badop[%s] = %d\n", ap->a_desc->vdesc_name,i);
 	return (i);
 }
 
 
 static int
 mfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_getpages), ap));
 }
Index: head/sys/ufs/ufs/ufs_bmap.c
===================================================================
--- head/sys/ufs/ufs/ufs_bmap.c	(revision 49534)
+++ head/sys/ufs/ufs/ufs_bmap.c	(revision 49535)
@@ -1,355 +1,354 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_bmap.c	8.7 (Berkeley) 3/21/95
- * $Id: ufs_bmap.c,v 1.27 1999/05/07 10:11:36 phk Exp $
+ * $Id: ufs_bmap.c,v 1.28 1999/05/08 06:40:25 phk Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/conf.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
-#include <miscfs/specfs/specdev.h>
 
 /*
  * Bmap converts a the logical block number of a file to its physical block
  * number on the disk. The conversion is done by using the logical block
  * number to index into the array of block pointers described by the dinode.
  */
 int
 ufs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		ufs_daddr_t a_bn;
 		struct vnode **a_vpp;
 		ufs_daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	/*
 	 * Check for underlying vnode requests and ensure that logical
 	 * to physical mapping is requested.
 	 */
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
 	if (ap->a_bnp == NULL)
 		return (0);
 
 	return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
 	    ap->a_runp, ap->a_runb));
 }
 
 /*
  * Indirect blocks are now on the vnode for the file.  They are given negative
  * logical block numbers.  Indirect blocks are addressed by the negative
  * address of the first data block to which they point.  Double indirect blocks
  * are addressed by one less than the address of the first indirect block to
  * which they point.  Triple indirect blocks are addressed by one less than
  * the address of the first double indirect block to which they point.
  *
  * ufs_bmaparray does the bmap conversion, and if requested returns the
  * array of logical blocks which must be traversed to get to a block.
  * Each entry contains the offset into that block that gets you to the
  * next block and the disk address of the block (if it is assigned).
  */
 
 int
 ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	ufs_daddr_t *bnp;
 	struct indir *ap;
 	int *nump;
 	int *runp;
 	int *runb;
 {
 	register struct inode *ip;
 	struct buf *bp;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *devvp;
 	struct indir a[NIADDR+1], *xap;
 	ufs_daddr_t daddr;
 	long metalbn;
 	int error, maxrun = 0, num;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 #ifdef DIAGNOSTIC
 	if ((ap != NULL && nump == NULL) || (ap == NULL && nump != NULL))
 		panic("ufs_bmaparray: invalid arguments");
 #endif
 
 	if (runp) {
 		*runp = 0;
 	}
 
 	if (runb) {
 		*runb = 0;
 	}
 
 	maxrun = 0;
 	if (runp || runb || (vp->v_maxio == 0)) {
 
 		struct vnode *devvp;
 		int blksize;
 
 		blksize = mp->mnt_stat.f_iosize;
 
 		/*
 		 * XXX
 		 * If MAXPHYS is the largest transfer the disks can handle,
 		 * we probably want maxrun to be 1 block less so that we
 		 * don't create a block larger than the device can handle.
 		 */
 		devvp = ip->i_devvp;
 
 		if (devvp != NULL && devvp->v_tag != VT_MFS &&
 		    devvp->v_type == VBLK) {
 			if (bdevsw(devvp->v_rdev)->d_maxio > MAXPHYS) {
 				maxrun = MAXPHYS;
 				vp->v_maxio = MAXPHYS;
 			} else {
 				maxrun = bdevsw(devvp->v_rdev)->d_maxio;
 				vp->v_maxio = bdevsw(devvp->v_rdev)->d_maxio;
 			}
 			maxrun = maxrun / blksize;
 			maxrun -= 1;
 		}
 
 		if (maxrun <= 0) {
 			vp->v_maxio = DFLTPHYS;
 			maxrun = DFLTPHYS / blksize;
 			maxrun -= 1;
 		}
 	}
 
 	xap = ap == NULL ? a : ap;
 	if (!nump)
 		nump = &num;
 	error = ufs_getlbns(vp, bn, xap, nump);
 	if (error)
 		return (error);
 
 	num = *nump;
 	if (num == 0) {
 		*bnp = blkptrtodb(ump, ip->i_db[bn]);
 		if (*bnp == 0)
 			*bnp = -1;
 		else if (runp) {
 			daddr_t bnb = bn;
 			for (++bn; bn < NDADDR && *runp < maxrun &&
 			    is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
 			    ++bn, ++*runp);
 			bn = bnb;
 			if (runb && (bn > 0)) {
 				for (--bn; (bn >= 0) && (*runb < maxrun) &&
 					is_sequential(ump, ip->i_db[bn],
 						ip->i_db[bn+1]);
 						--bn, ++*runb);
 			}
 		}
 		return (0);
 	}
 
 
 	/* Get disk address out of indirect block array */
 	daddr = ip->i_ib[xap->in_off];
 
 	devvp = VFSTOUFS(vp->v_mount)->um_devvp;
 	for (bp = NULL, ++xap; --num; ++xap) {
 		/*
 		 * Exit the loop if there is no disk address assigned yet and
 		 * the indirect block isn't in the cache, or if we were
 		 * looking for an indirect block and we've found it.
 		 */
 
 		metalbn = xap->in_lbn;
 		if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn)
 			break;
 		/*
 		 * If we get here, we've either got the block in the cache
 		 * or we have a disk address for it, go fetch it.
 		 */
 		if (bp)
 			bqrelse(bp);
 
 		xap->in_exists = 1;
 		bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
 		if ((bp->b_flags & B_CACHE) == 0) {
 #ifdef DIAGNOSTIC
 			if (!daddr)
 				panic("ufs_bmaparray: indirect block not in cache");
 #endif
 			bp->b_blkno = blkptrtodb(ump, daddr);
 			bp->b_flags |= B_READ;
 			bp->b_flags &= ~(B_INVAL|B_ERROR);
 			vfs_busy_pages(bp, 0);
 			VOP_STRATEGY(bp->b_vp, bp);
 			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
 			error = biowait(bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off];
 		if (num == 1 && daddr && runp) {
 			for (bn = xap->in_off + 1;
 			    bn < MNINDIR(ump) && *runp < maxrun &&
 			    is_sequential(ump,
 			    ((ufs_daddr_t *)bp->b_data)[bn - 1],
 			    ((ufs_daddr_t *)bp->b_data)[bn]);
 			    ++bn, ++*runp);
 			bn = xap->in_off;
 			if (runb && bn) {
 				for(--bn; bn > 0 && *runb < maxrun &&
 			    		is_sequential(ump, ((daddr_t *)bp->b_data)[bn],
 					    ((daddr_t *)bp->b_data)[bn+1]);
 			    		--bn, ++*runb);
 			}
 		}
 	}
 	if (bp)
 		bqrelse(bp);
 
 	daddr = blkptrtodb(ump, daddr);
 	*bnp = daddr == 0 ? -1 : daddr;
 	return (0);
 }
 
 /*
  * Create an array of logical block number/offset pairs which represent the
  * path of indirect blocks required to access a data block.  The first "pair"
  * contains the logical block number of the appropriate single, double or
  * triple indirect block and the offset into the inode indirect block array.
  * Note, the logical block number of the inode single/double/triple indirect
  * block appears twice in the array, once with the offset into the i_ib and
  * once with the offset into the page itself.
  */
 int
 ufs_getlbns(vp, bn, ap, nump)
 	struct vnode *vp;
 	ufs_daddr_t bn;
 	struct indir *ap;
 	int *nump;
 {
 	long blockcnt, metalbn, realbn;
 	struct ufsmount *ump;
 	int i, numlevels, off;
 	int64_t qblockcnt;
 
 	ump = VFSTOUFS(vp->v_mount);
 	if (nump)
 		*nump = 0;
 	numlevels = 0;
 	realbn = bn;
 	if ((long)bn < 0)
 		bn = -(long)bn;
 
 	/* The first NDADDR blocks are direct blocks. */
 	if (bn < NDADDR)
 		return (0);
 
 	/*
 	 * Determine the number of levels of indirection.  After this loop
 	 * is done, blockcnt indicates the number of data blocks possible
 	 * at the previous level of indirection, and NIADDR - i is the number
 	 * of levels of indirection needed to locate the requested block.
 	 */
 	for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
 		if (i == 0)
 			return (EFBIG);
 		/*
 		 * Use int64_t's here to avoid overflow for triple indirect
 		 * blocks when longs have 32 bits and the block size is more
 		 * than 4K.
 		 */
 		qblockcnt = (int64_t)blockcnt * MNINDIR(ump);
 		if (bn < qblockcnt)
 			break;
 		blockcnt = qblockcnt;
 	}
 
 	/* Calculate the address of the first meta-block. */
 	if (realbn >= 0)
 		metalbn = -(realbn - bn + NIADDR - i);
 	else
 		metalbn = -(-realbn - bn + NIADDR - i);
 
 	/*
 	 * At each iteration, off is the offset into the bap array which is
 	 * an array of disk addresses at the current level of indirection.
 	 * The logical block number and the offset in that block are stored
 	 * into the argument array.
 	 */
 	ap->in_lbn = metalbn;
 	ap->in_off = off = NIADDR - i;
 	ap->in_exists = 0;
 	ap++;
 	for (++numlevels; i <= NIADDR; i++) {
 		/* If searching for a meta-data block, quit when found. */
 		if (metalbn == realbn)
 			break;
 
 		off = (bn / blockcnt) % MNINDIR(ump);
 
 		++numlevels;
 		ap->in_lbn = metalbn;
 		ap->in_off = off;
 		ap->in_exists = 0;
 		++ap;
 
 		metalbn -= -1 + off * blockcnt;
 		blockcnt /= MNINDIR(ump);
 	}
 	if (nump)
 		*nump = numlevels;
 	return (0);
 }
Index: head/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- head/sys/ufs/ufs/ufs_vnops.c	(revision 49534)
+++ head/sys/ufs/ufs/ufs_vnops.c	(revision 49535)
@@ -1,2337 +1,2337 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
- * $Id: ufs_vnops.c,v 1.115 1999/06/16 23:27:53 mckusick Exp $
+ * $Id: ufs_vnops.c,v 1.116 1999/07/13 18:20:13 mckusick Exp $
  */
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/lockf.h>
 #include <sys/poll.h>
+#include <sys/conf.h>
 
 #include <vm/vm_zone.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
-#include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 static int ufs_abortop __P((struct vop_abortop_args *));
 static int ufs_access __P((struct vop_access_args *));
 static int ufs_advlock __P((struct vop_advlock_args *));
 static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
 static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
 static int ufs_close __P((struct vop_close_args *));
 static int ufs_create __P((struct vop_create_args *));
 static int ufs_getattr __P((struct vop_getattr_args *));
 static int ufs_link __P((struct vop_link_args *));
 static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
 static int ufs_missingop __P((struct vop_generic_args *ap));
 static int ufs_mkdir __P((struct vop_mkdir_args *));
 static int ufs_mknod __P((struct vop_mknod_args *));
 static int ufs_mmap __P((struct vop_mmap_args *));
 static int ufs_open __P((struct vop_open_args *));
 static int ufs_pathconf __P((struct vop_pathconf_args *));
 static int ufs_print __P((struct vop_print_args *));
 static int ufs_readdir __P((struct vop_readdir_args *));
 static int ufs_readlink __P((struct vop_readlink_args *));
 static int ufs_remove __P((struct vop_remove_args *));
 static int ufs_rename __P((struct vop_rename_args *));
 static int ufs_rmdir __P((struct vop_rmdir_args *));
 static int ufs_setattr __P((struct vop_setattr_args *));
 static int ufs_strategy __P((struct vop_strategy_args *));
 static int ufs_symlink __P((struct vop_symlink_args *));
 static int ufs_whiteout __P((struct vop_whiteout_args *));
 static int ufsfifo_close __P((struct vop_close_args *));
 static int ufsfifo_read __P((struct vop_read_args *));
 static int ufsfifo_write __P((struct vop_write_args *));
 static int ufsspec_close __P((struct vop_close_args *));
 static int ufsspec_read __P((struct vop_read_args *));
 static int ufsspec_write __P((struct vop_write_args *));
 
 union _qcvt {
 	int64_t qcvt;
 	int32_t val[2];
 };
 #define SETHIGH(q, h) { \
 	union _qcvt tmp; \
 	tmp.qcvt = (q); \
 	tmp.val[_QUAD_HIGHWORD] = (h); \
 	(q) = tmp.qcvt; \
 }
 #define SETLOW(q, l) { \
 	union _qcvt tmp; \
 	tmp.qcvt = (q); \
 	tmp.val[_QUAD_LOWWORD] = (l); \
 	(q) = tmp.qcvt; \
 }
 
 /*
  * A virgin directory (no blushing please).
  */
 static struct dirtemplate mastertemplate = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 static struct odirtemplate omastertemplate = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 void
 ufs_itimes(vp)
 	struct vnode *vp;
 {
 	struct inode *ip;
 	time_t tv_sec;
 
 	ip = VTOI(vp);
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 	if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 		tv_sec = time_second;
 		if ((vp->v_type == VBLK || vp->v_type == VCHR) &&
 		    !DOINGSOFTDEP(vp))
 			ip->i_flag |= IN_LAZYMOD;
 		else
 			ip->i_flag |= IN_MODIFIED;
 		if (ip->i_flag & IN_ACCESS)
 			ip->i_atime = tv_sec;
 		if (ip->i_flag & IN_UPDATE) {
 			ip->i_mtime = tv_sec;
 			ip->i_modrev++;
 		}
 		if (ip->i_flag & IN_CHANGE)
 			ip->i_ctime = tv_sec;
 	}
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 /*
  * Create a regular file
  */
 int
 ufs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	int error;
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 int
 ufs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		ip->i_rdev = vap->va_rdev;
 	}
 	/*
 	 * Remove inode so that it will be reloaded by VFS_VGET and
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.
 	 */
 	vput(*vpp);
 	(*vpp)->v_type = VNON;
 	vgone(*vpp);
 	*vpp = 0;
 	return (0);
 }
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 int
 ufs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 int
 ufs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		ufs_itimes(vp);
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 int
 ufs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 #ifdef QUOTA
 	int error;
 #endif
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if ((error = getinoquota(ip)) != 0)
 				return (error);
 #endif
 			break;
 		default:
 			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
 		return (EPERM);
 
 	/* Otherwise, user id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->i_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->i_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 }
 
 /* ARGSUSED */
 int
 ufs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 	register struct vattr *vap = ap->a_vap;
 
 	ufs_itimes(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ip->i_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = ip->i_rdev;
 	vap->va_size = ip->i_din.di_size;
 	vap->va_atime.tv_sec = ip->i_atime;
 	vap->va_atime.tv_nsec = ip->i_atimensec;
 	vap->va_mtime.tv_sec = ip->i_mtime;
 	vap->va_mtime.tv_nsec = ip->i_mtimensec;
 	vap->va_ctime.tv_sec = ip->i_ctime;
 	vap->va_ctime.tv_nsec = ip->i_ctimensec;
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 
 	/*
 	 * Use the information contained in v_specinfo for VBLK and VCHR
 	 * vnodes, and in the underlying mount point for (typically) VREG
 	 * vnodes.  Note that vp->v_specmountpoint can be NULL.
 	 */
 	if (vp->v_type == VBLK) {
 		vap->va_blocksize = vp->v_specinfo->si_bsize_best;
 	} else if (vp->v_type == VCHR) {
 		vap->va_blocksize = vp->v_specinfo->si_bsize_max;
 	} else {
 		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	}
 
 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
 	vap->va_type = IFTOVT(ip->i_mode);
 	vap->va_filerev = ip->i_modrev;
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 int
 ufs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct proc *p = ap->a_p;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != ip->i_uid &&
 		    (error = suser_xxx(cred, p, PRISON_ROOT)))
 			return (error);
 		if (cred->cr_uid == 0) {
 			if ((ip->i_flags
 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
 			    securelevel > 0)
 				return (EPERM);
 			ip->i_flags = vap->va_flags;
 		} else {
 			if (ip->i_flags
 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
 				return (EPERM);
 			ip->i_flags &= SF_SETTABLE;
 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
 		}
 		ip->i_flag |= IN_CHANGE;
 		if (vap->va_flags & (IMMUTABLE | APPEND))
 			return (0);
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			break;
 		}
 		if ((error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p)) != 0)
 			return (error);
 	}
 	ip = VTOI(vp);
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != ip->i_uid &&
 		    (error = suser_xxx(cred, p, PRISON_ROOT)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
 			return (error);
 		if (vap->va_atime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_ACCESS;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		ufs_itimes(vp);
 		if (vap->va_atime.tv_sec != VNOVAL)
 			ip->i_atime = vap->va_atime.tv_sec;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			ip->i_mtime = vap->va_mtime.tv_sec;
 		error = UFS_UPDATE(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
 	}
 	VN_POLLEVENT(vp, POLLATTRIB);
 	return (error);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ufs_chmod(vp, mode, cred, p)
 	register struct vnode *vp;
 	register int mode;
 	register struct ucred *cred;
 	struct proc *p;
 {
 	register struct inode *ip = VTOI(vp);
 	int error;
 
 	if (cred->cr_uid != ip->i_uid) {
 	    error = suser_xxx(cred, p, PRISON_ROOT);
 	    if (error)
 		return (error);
 	}
 	if (cred->cr_uid) {
 		if (vp->v_type != VDIR && (mode & S_ISTXT))
 			return (EFTYPE);
 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
 			return (EPERM);
 	}
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
 	return (0);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ufs_chown(vp, uid, gid, cred, p)
 	register struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
 	register int i;
 	long change;
 #endif
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * If we don't own the file, are trying to change the owner
 	 * of the file, or are not a member of the target group,
 	 * the caller must be superuser or the call fails.
 	 */
 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
 	    (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) &&
 	    (error = suser_xxx(cred, p, PRISON_ROOT)))
 		return (error);
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) != 0)
 		return (error);
 	if (ouid == uid) {
 		dqrele(vp, ip->i_dquot[USRQUOTA]);
 		ip->i_dquot[USRQUOTA] = NODQUOT;
 	}
 	if (ogid == gid) {
 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
 		ip->i_dquot[GRPQUOTA] = NODQUOT;
 	}
 	change = ip->i_blocks;
 	(void) chkdq(ip, -change, cred, CHOWN);
 	(void) chkiq(ip, -1, cred, CHOWN);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		dqrele(vp, ip->i_dquot[i]);
 		ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	ip->i_gid = gid;
 	ip->i_uid = uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
 				goto good;
 			else
 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 	ip->i_gid = ogid;
 	ip->i_uid = ouid;
 	if (getinoquota(ip) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
 		(void) getinoquota(ip);
 	}
 	return (error);
 good:
 	if (getinoquota(ip))
 		panic("ufs_chown: lost quota");
 #endif /* QUOTA */
 	ip->i_flag |= IN_CHANGE;
 	if (cred->cr_uid != 0 && (ouid != uid || ogid != gid))
 		ip->i_mode &= ~(ISUID | ISGID);
 	return (0);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 int
 ufs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 int
 ufs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
 	VN_POLLEVENT(vp, POLLNLINK);
 	VN_POLLEVENT(dvp, POLLWRITE);
 out:
 	return (error);
 }
 
 /*
  * link vnode call
  */
 int
 ufs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct proc *p = cnp->cn_proc;
 	struct inode *ip;
 	struct direct newdir;
 	int error;
 
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_link: no name");
 #endif
 	if (tdvp->v_mount != vp->v_mount) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EXDEV;
 		goto out2;
 	}
 	if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
 		VOP_ABORTOP(tdvp, cnp);
 		goto out2;
 	}
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EMLINK;
 		goto out1;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EPERM;
 		goto out1;
 	}
 	ip->i_effnlink++;
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(vp))
 		softdep_increase_linkcnt(ip);
 	error = UFS_UPDATE(vp, !(DOINGSOFTDEP(vp) | DOINGASYNC(vp)));
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
 	}
 
 	if (error) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 out1:
 	if (tdvp != vp)
 		VOP_UNLOCK(vp, 0, p);
 out2:
 	VN_POLLEVENT(vp, POLLNLINK);
 	VN_POLLEVENT(tdvp, POLLWRITE);
 	return (error);
 }
 
 /*
  * whiteout vnode call
  */
 int
 ufs_whiteout(ap)
 	struct vop_whiteout_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct direct newdir;
 	int error = 0;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		/* 4.4 format directories support whiteout operations */
 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
 			return (0);
 		return (EOPNOTSUPP);
 
 	case CREATE:
 		/* create a new directory whiteout */
 #ifdef DIAGNOSTIC
 		if ((cnp->cn_flags & SAVENAME) == 0)
 			panic("ufs_whiteout: missing name");
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		newdir.d_ino = WINO;
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
 		break;
 
 	case DELETE:
 		/* remove an existing directory whiteout */
 #ifdef DIAGNOSTIC
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
 	default:
 		panic("ufs_whiteout: unknown op");
 	}
 	if (cnp->cn_flags & HASBUF) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_flags &= ~HASBUF;
 	}
 	return (error);
 }
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 int
 ufs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tvp = ap->a_tvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct proc *p = fcnp->cn_proc;
 	struct inode *ip, *xp, *dp;
 	struct direct newdir;
 	int doingdirectory = 0, oldparent = 0, newparent = 0;
 	int error = 0, ioflag;
 
 #ifdef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ufs_rename: no name");
 #endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, tcnp);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fdvp, fcnp);
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Check if just deleting a link name or if we've lost a race.
 	 * If another process completes the same rename after we've looked
 	 * up the source and have blocked looking up the target, then the
 	 * source and target inodes may be identical now although the
 	 * names were never linked.
 	 */
 	if (fvp == tvp) {
 		if (fvp->v_type == VDIR) {
 			/*
 			 * Linked directories are impossible, so we must
 			 * have lost the race.  Pretend that the rename
 			 * completed before the lookup.
 			 */
 #ifdef UFS_RENAME_DEBUG
 			printf("ufs_rename: fvp == tvp for directories\n");
 #endif
 			error = ENOENT;
 			goto abortit;
 		}
 
 		/* Release destination completely. */
 		VOP_ABORTOP(tdvp, tcnp);
 		vput(tdvp);
 		vput(tvp);
 
 		/*
 		 * Delete source.  There is another race now that everything
 		 * is unlocked, but this doesn't cause any new complications.
 		 * Relookup() may find a file that is unrelated to the
 		 * original one, or it may fail.  Too bad.
 		 */
 		vrele(fdvp);
 		vrele(fvp);
 		fcnp->cn_flags &= ~MODMASK;
 		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 		if ((fcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost from startdir");
 		fcnp->cn_nameiop = DELETE;
 		VREF(fdvp);
 		error = relookup(fdvp, &fvp, fcnp);
 		if (error == 0)
 			vrele(fdvp);
 		if (fvp == NULL) {
 #ifdef UFS_RENAME_DEBUG
 			printf("ufs_rename: from name disappeared\n");
 #endif
 			return (ENOENT);
 		}
 		error = VOP_REMOVE(fdvp, fvp, fcnp);
 		if (fdvp == fvp)
 			vrele(fdvp);
 		else
 			vput(fdvp);
 		vput(fvp);
 		return (error);
 	}
 	if ((error = vn_lock(fvp, LK_EXCLUSIVE, p)) != 0)
 		goto abortit;
 	dp = VTOI(fdvp);
 	ip = VTOI(fvp);
 	if (ip->i_nlink >= LINK_MAX) {
 		VOP_UNLOCK(fvp, 0, p);
 		error = EMLINK;
 		goto abortit;
 	}
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (dp->i_flags & APPEND)) {
 		VOP_UNLOCK(fvp, 0, p);
 		error = EPERM;
 		goto abortit;
 	}
 	if ((ip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
 		    (ip->i_flag & IN_RENAME)) {
 			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->i_flag |= IN_RENAME;
 		oldparent = dp->i_number;
 		doingdirectory = 1;
 	}
 	VN_POLLEVENT(fdvp, POLLWRITE);
 	vrele(fdvp);
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTOI(tdvp);
 	xp = NULL;
 	if (tvp)
 		xp = VTOI(tvp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	ip->i_effnlink++;
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(fvp))
 		softdep_increase_linkcnt(ip);
 	if ((error = UFS_UPDATE(fvp, !(DOINGSOFTDEP(fvp) |
 				       DOINGASYNC(fvp)))) != 0) {
 		VOP_UNLOCK(fvp, 0, p);
 		goto bad;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory heirarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to checkpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 	VOP_UNLOCK(fvp, 0, p);
 	if (oldparent != dp->i_number)
 		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
 		if (error)
 			goto out;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
 		VREF(tdvp);
 		error = relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		vrele(tdvp);
 		dp = VTOI(tdvp);
 		xp = NULL;
 		if (tvp)
 			xp = VTOI(tvp);
 	}
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (xp == NULL) {
 		if (dp->i_dev != ip->i_dev)
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Account for ".." in new directory.
 		 * When source and destination have the same
 		 * parent we don't fool with the link count.
 		 */
 		if (doingdirectory && newparent) {
 			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
 			dp->i_effnlink++;
 			dp->i_nlink++;
 			dp->i_flag |= IN_CHANGE;
 			if (DOINGSOFTDEP(tdvp))
 				softdep_increase_linkcnt(dp);
 			error = UFS_UPDATE(tdvp, !(DOINGSOFTDEP(tdvp) |
 						   DOINGASYNC(tdvp)));
 			if (error)
 				goto bad;
 		}
 		ufs_makedirentry(ip, tcnp, &newdir);
 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
 		if (error) {
 			if (doingdirectory && newparent) {
 				dp->i_effnlink--;
 				dp->i_nlink--;
 				dp->i_flag |= IN_CHANGE;
 				(void)UFS_UPDATE(tdvp, 1);
 			}
 			goto bad;
 		}
 		VN_POLLEVENT(tdvp, POLLWRITE);
 		vput(tdvp);
 	} else {
 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (xp->i_number == ip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the user must
 		 * own the parent directory, or the destination of the rename,
 		 * otherwise the destination may not be changed (except by
 		 * root). This implements append-only directories.
 		 */
 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((xp->i_mode&IFMT) == IFDIR) {
 			if ((xp->i_effnlink > 2) ||
 			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		error = ufs_dirrewrite(dp, xp, ip->i_number,
 		    IFTODT(ip->i_mode),
 		    (doingdirectory && newparent) ? newparent : doingdirectory);
 		if (error)
 			goto bad;
 		if (doingdirectory) {
 			if (!newparent) {
 				dp->i_effnlink--;
 				dp->i_flag |= IN_CHANGE;
 			}
 			xp->i_effnlink--;
 			xp->i_flag |= IN_CHANGE;
 		}
 		VN_POLLEVENT(tdvp, POLLWRITE);
 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 			/*
 			 * Truncate inode. The only stuff left in the directory
 			 * is "." and "..". The "." reference is inconsequential
 			 * since we are quashing it. We have removed the "."
 			 * reference and the reference in the parent directory,
 			 * but there may be other hard links. The soft
 			 * dependency code will arrange to do these operations
 			 * after the parent directory entry has been deleted on
 			 * disk, so when running with that code we avoid doing
 			 * them now.
 			 */
 			if (!newparent)
 				dp->i_nlink--;
 			xp->i_nlink--;
 			ioflag = DOINGASYNC(tvp) ? 0 : IO_SYNC;
 			if ((error = UFS_TRUNCATE(tvp, (off_t)0, ioflag,
 			    tcnp->cn_cred, tcnp->cn_proc)) != 0)
 				goto bad;
 		}
 		vput(tdvp);
 		VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * 3) Unlink the source.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	if ((fcnp->cn_flags & SAVESTART) == 0)
 		panic("ufs_rename: lost from startdir");
 	VREF(fdvp);
 	error = relookup(fdvp, &fvp, fcnp);
 	if (error == 0)
 		vrele(fdvp);
 	if (fvp != NULL) {
 		xp = VTOI(fvp);
 		dp = VTOI(fdvp);
 	} else {
 		/*
 		 * From name has disappeared.
 		 */
 		if (doingdirectory)
 			panic("ufs_rename: lost dir entry");
 		vrele(ap->a_fvp);
 		return (0);
 	}
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed while the new name has been entered. If the source is
 	 * a file then the entry may have been unlinked or renamed. In
 	 * either case there is no further work to be done. If the source
 	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
 	 * flag ensures that it cannot be moved by another rename or removed
 	 * by a rmdir.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
 			panic("ufs_rename: lost dir entry");
 	} else {
 		/*
 		 * If the source is a directory with a
 		 * new parent, the link count of the old
 		 * parent directory must be decremented
 		 * and ".." set to point to the new parent.
 		 */
 		if (doingdirectory && newparent) {
 			xp->i_offset = mastertemplate.dot_reclen;
 			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
 			cache_purge(fdvp);
 		}
 		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
 		xp->i_flag &= ~IN_RENAME;
 	}
 	if (dp)
 		vput(fdvp);
 	if (xp)
 		vput(fvp);
 	vrele(ap->a_fvp);
 	return (error);
 
 bad:
 	if (xp)
 		vput(ITOV(xp));
 	vput(ITOV(dp));
 out:
 	if (doingdirectory)
 		ip->i_flag &= ~IN_RENAME;
 	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 		ip->i_flag &= ~IN_RENAME;
 		vput(fvp);
 	} else
 		vrele(fvp);
 	return (error);
 }
 
 /*
  * Mkdir system call
  */
 int
 ufs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct buf *bp;
 	struct dirtemplate dirtemplate, *dtp;
 	struct direct newdir;
 	int error, dmode;
 	long blkoff;
 
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_mkdir: no name");
 #endif
 	dp = VTOI(dvp);
 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ufs_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		ucp = cnp->cn_cred;
 #endif			I
 		/*
 		 * If we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 #ifdef QUOTA
 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
 				/*
 				 * Make sure the correct user gets charged
 				 * for the space.
 				 * Make a dummy credential for the victim.
 				 * XXX This seems to never be accessed out of
 				 * our context so a stack variable is ok.
 				 */
 				ucred.cr_ref = 1;
 				ucred.cr_uid = ip->i_uid;
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups[0] = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
 		} else
 			ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			zfree(namei_zone, cnp->cn_pnbuf);
 			UFS_VFREE(tvp, ip->i_number, dmode);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		UFS_VFREE(tvp, ip->i_number, dmode);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = dmode;
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	if (DOINGSOFTDEP(tvp))
 		softdep_increase_linkcnt(ip);
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 
 	/*
 	 * Bump link count in parent directory to reflect work done below.
 	 * Should be done before reference is created so cleanup is
 	 * possible if we crash.
 	 */
 	dp->i_effnlink++;
 	dp->i_nlink++;
 	dp->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(dvp))
 		softdep_increase_linkcnt(dp);
 	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(dvp) | DOINGASYNC(dvp)));
 	if (error)
 		goto bad;
 
 	/*
 	 * Initialize directory with "." and ".." from static template.
 	 */
 	if (dvp->v_mount->mnt_maxsymlinklen > 0
 	)
 		dtp = &mastertemplate;
 	else
 		dtp = (struct dirtemplate *)&omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
 	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 	    B_CLRBUF, &bp)) != 0)
 		goto bad;
 	ip->i_size = DIRBLKSIZ;
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	vnode_pager_setsize(tvp, (u_long)ip->i_size);
 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 	if (DOINGSOFTDEP(tvp)) {
 		/*
 		 * Ensure that the entire newly allocated block is a
 		 * valid directory so that future growth within the
 		 * block does not have to ensure that the block is
 		 * written before the inode.
 		 */
 		blkoff = DIRBLKSIZ;
 		while (blkoff < bp->b_bcount) {
 			((struct direct *)
 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 			blkoff += DIRBLKSIZ;
 		}
 	}
 	if ((error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) |
 				       DOINGASYNC(tvp)))) != 0) {
 		(void)VOP_BWRITE(bp->b_vp, bp);
 		goto bad;
 	}
 	VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */
 	/*
 	 * Directory set up, now install its entry in the parent directory.
 	 *
 	 * If we are not doing soft dependencies, then we must write out the
 	 * buffer containing the new directory body before entering the new 
 	 * name in the parent. If we are doing soft dependencies, then the
 	 * buffer containing the new directory body will be passed to and
 	 * released in the soft dependency code after the code has attached
 	 * an appropriate ordering dependency to the buffer which ensures that
 	 * the buffer is written before the new name is written in the parent.
 	 */
 	if (DOINGASYNC(dvp))
 		bdwrite(bp);
 	else if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp->b_vp, bp))))
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
 	
 bad:
 	if (error == 0) {
 		*ap->a_vpp = tvp;
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
 		dp->i_flag |= IN_CHANGE;
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
 		 * do this for us because we set the link count to 0.
 		 */
 		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
 		ip->i_flag |= IN_CHANGE;
 		vput(tvp);
 	}
 out:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	return (error);
 }
 
 /*
  * Rmdir system call.
  */
 int
 ufs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error, ioflag;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Do not remove a directory that is in the process of being renamed.
 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
 	 * valid since ".." will contain a reference to the current directory
 	 * and thus be non-empty. Do not allow the removal of mounted on
 	 * directories (this can happen when an NFS exported filesystem
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
 	if (ip->i_flag & IN_RENAME) {
 		error = EINVAL;
 		goto out;
 	}
 	if (ip->i_effnlink != 2 ||
 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	if (vp->v_mountedhere != 0) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error)
 		goto out;
 	VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK);
 	cache_purge(dvp);
 	/*
 	 * Truncate inode. The only stuff left in the directory is "." and
 	 * "..". The "." reference is inconsequential since we are quashing
 	 * it. We have removed the "." reference and the reference in the
 	 * parent directory, but there may be other hard links. So,
 	 * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
 	 * new entries are made. The soft dependency code will arrange to
 	 * do these operations after the parent directory entry has been
 	 * deleted on disk, so when running with that code we avoid doing
 	 * them now.
 	 */
 	dp->i_effnlink--;
 	dp->i_flag |= IN_CHANGE;
 	ip->i_effnlink--;
 	ip->i_flag |= IN_CHANGE;
 	if (!DOINGSOFTDEP(vp)) {
 		dp->i_nlink--;
 		ip->i_nlink--;
 		ioflag = DOINGASYNC(vp) ? 0 : IO_SYNC;
 		error = UFS_TRUNCATE(vp, (off_t)0, ioflag, cnp->cn_cred,
 		    cnp->cn_proc);
 	}
 	cache_purge(vp);
 out:
 	VN_POLLEVENT(vp, POLLNLINK);
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 int
 ufs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *vp, **vpp = ap->a_vpp;
 	register struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < vp->v_mount->mnt_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
 		ip->i_size = len;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
 		    (struct proc *)0);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Vnode op for reading directories.
  *
  * The routine below assumes that the on-disk format of a directory
  * is the same as that defined by <sys/dirent.h>. If the on-disk
  * format changes, then it will be necessary to do a conversion
  * from the on-disk format that read returns to the format defined
  * by <sys/dirent.h>.
  */
 int
 ufs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	register struct uio *uio = ap->a_uio;
 	int error;
 	size_t count, lost;
 	off_t off;
 
 	if (ap->a_ncookies != NULL)
 		/*
 		 * Ensure that the block is aligned.  The caller can use
 		 * the cookies to determine where in the block to start.
 		 */
 		uio->uio_offset &= ~(DIRBLKSIZ - 1);
 	off = uio->uio_offset;
 	count = uio->uio_resid;
 	/* Make sure we don't return partial entries. */
 	if (count <= ((uio->uio_offset + count) & (DIRBLKSIZ -1)))
 		return (EINVAL);
 	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
 	lost = uio->uio_resid - count;
 	uio->uio_resid = count;
 	uio->uio_iov->iov_len = count;
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
 			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 		} else {
 			struct dirent *dp, *edp;
 			struct uio auio;
 			struct iovec aiov;
 			caddr_t dirbuf;
 			int readcnt;
 			u_char tmp;
 
 			auio = *uio;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			aiov.iov_len = count;
 			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
 			aiov.iov_base = dirbuf;
 			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
 			if (error == 0) {
 				readcnt = count - auio.uio_resid;
 				edp = (struct dirent *)&dirbuf[readcnt];
 				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 					tmp = dp->d_namlen;
 					dp->d_namlen = dp->d_type;
 					dp->d_type = tmp;
 					if (dp->d_reclen > 0) {
 						dp = (struct dirent *)
 						    ((char *)dp + dp->d_reclen);
 					} else {
 						error = EIO;
 						break;
 					}
 				}
 				if (dp >= edp)
 					error = uiomove(dirbuf, readcnt, uio);
 			}
 			FREE(dirbuf, M_TEMP);
 		}
 #	else
 		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 #	endif
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dpEnd;
 		struct dirent* dp;
 		int ncookies;
 		u_long *cookies;
 		u_long *cookiep;
 
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ufs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
 		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
 		for (dp = dpStart, ncookies = 0;
 		     dp < dpEnd;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
 			ncookies++;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
 		    M_WAITOK);
 		for (dp = dpStart, cookiep = cookies;
 		     dp < dpEnd;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_long) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 	uio->uio_resid += lost;
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 int
 ufs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 	int isize;
 
 	isize = ip->i_size;
 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
 		return (0);
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
  * done. If a buffer has been saved in anticipation of a CREATE, delete it.
  */
 /* ARGSUSED */
 int
 ufs_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the VOP_BMAP operation may not
  * deadlock on memory.  See ufs_bmap() for details.
  */
 int
 ufs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("ufs_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 		if (error) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	vp = ip->i_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOP_STRATEGY(vp, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 int
 ufs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 
 	printf("tag VT_UFS, ino %lu, on dev %#lx (%d, %d)",
 	    (u_long)ip->i_number, (u_long)ip->i_dev, major(ip->i_dev),
 	    minor(ip->i_dev));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	lockmgr_printinfo(&ip->i_lock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Read wrapper for special devices.
  */
 int
 ufsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error, resid;
 	struct inode *ip;
 	struct uio *uio;
 
 	uio = ap->a_uio;
 	resid = uio->uio_resid;
 	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap);
 	/*
 	 * The inode may have been revoked during the call, so it must not
 	 * be accessed blindly here or in the other wrapper functions.
 	 */
 	ip = VTOI(ap->a_vp);
 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
 		ip->i_flag |= IN_ACCESS;
 	return (error);
 }
 
 /*
  * Write wrapper for special devices.
  */
 int
 ufsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error, resid;
 	struct inode *ip;
 	struct uio *uio;
 
 	uio = ap->a_uio;
 	resid = uio->uio_resid;
 	error = VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap);
 	ip = VTOI(ap->a_vp);
 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
 		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the inode then do device close.
  */
 int
 ufsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		ufs_itimes(vp);
 	simple_unlock(&vp->v_interlock);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 int
 ufsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error, resid;
 	struct inode *ip;
 	struct uio *uio;
 
 	uio = ap->a_uio;
 	resid = uio->uio_resid;
 	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap);
 	ip = VTOI(ap->a_vp);
 	if ((ap->a_vp->v_mount->mnt_flag & MNT_NOATIME) == 0 && ip != NULL &&
 	    (uio->uio_resid != resid || (error == 0 && resid != 0)))
 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
 	return (error);
 }
 
 /*
  * Write wrapper for fifos.
  */
 int
 ufsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int error, resid;
 	struct inode *ip;
 	struct uio *uio;
 
 	uio = ap->a_uio;
 	resid = uio->uio_resid;
 	error = VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap);
 	ip = VTOI(ap->a_vp);
 	if (ip != NULL && (uio->uio_resid != resid || (error == 0 && resid != 0)))
 		VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 int
 ufsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		ufs_itimes(vp);
 	simple_unlock(&vp->v_interlock);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Return POSIX pathconf information applicable to ufs filesystems.
  */
 int
 ufs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = LINK_MAX;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Advisory record locking support
  */
 int
 ufs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct inode *ip = VTOI(ap->a_vp);
 
 	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ufs_vinit(mntp, specops, fifoops, vpp)
 	struct mount *mntp;
 	vop_t **specops;
 	vop_t **fifoops;
 	struct vnode **vpp;
 {
 	struct inode *ip;
 	struct vnode *vp, *nvp;
 	struct timeval tv;
 
 	vp = *vpp;
 	ip = VTOI(vp);
 	switch(vp->v_type = IFTOVT(ip->i_mode)) {
 	case VCHR:
 	case VBLK:
 		vp->v_op = specops;
 		nvp = checkalias(vp, ip->i_rdev, mntp);
 		if (nvp) {
 			/*
 			 * Discard unneeded vnode, but save its inode.
 			 * Note that the lock is carried over in the inode
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	case VFIFO:
 		vp->v_op = fifoops;
 		break;
 	default:
 		break;
 
 	}
 	if (ip->i_number == ROOTINO)
 		vp->v_flag |= VROOT;
 	/*
 	 * Initialize modrev times
 	 */
 	getmicrouptime(&tv);
 	SETHIGH(ip->i_modrev, tv.tv_sec);
 	SETLOW(ip->i_modrev, tv.tv_usec * 4294);
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  */
 int
 ufs_makeinode(mode, dvp, vpp, cnp)
 	int mode;
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
 {
 	register struct inode *ip, *pdir;
 	struct direct newdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_makeinode: no name");
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		return (error);
 	}
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		ucp = cnp->cn_cred;
 #endif			I
 		/*
 		 * If we are not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			mode &= ~07111;
 #ifdef QUOTA
 			/*
 			 * Make sure the correct user gets charged
 			 * for the space.
 			 * Quickly knock up a dummy credential for the victim.
 			 * XXX This seems to never be accessed out of our
 			 * context so a stack variable is ok.
 			 */
 			ucred.cr_ref = 1;
 			ucred.cr_uid = ip->i_uid;
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups[0] = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else
 			ip->i_uid = cnp->cn_cred->cr_uid;
 
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			zfree(namei_zone, cnp->cn_pnbuf);
 			UFS_VFREE(tvp, ip->i_number, mode);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		UFS_VFREE(tvp, ip->i_number, mode);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = mode;
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
 	if (DOINGSOFTDEP(tvp))
 		softdep_increase_linkcnt(ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    suser_xxx(cnp->cn_cred, 0, 0))
 		ip->i_mode &= ~ISGID;
 
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = UFS_UPDATE(tvp, !(DOINGSOFTDEP(tvp) | DOINGASYNC(tvp)));
 	if (error)
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
 	if (error)
 		goto bad;
 
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	zfree(namei_zone, cnp->cn_pnbuf);
 	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
 	ip->i_flag |= IN_CHANGE;
 	vput(tvp);
 	return (error);
 }
 
 static int
 ufs_missingop(ap)
 	struct vop_generic_args *ap;
 {
 
 	panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name);
 	return (EOPNOTSUPP);
 }
 
 /* Global vfs data structures for ufs. */
 static vop_t **ufs_vnodeop_p;
 static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_read_desc,		(vop_t *) ufs_missingop },
 	{ &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
 	{ &vop_write_desc,		(vop_t *) ufs_missingop },
 	{ &vop_abortop_desc,		(vop_t *) ufs_abortop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
 	{ &vop_close_desc,		(vop_t *) ufs_close },
 	{ &vop_create_desc,		(vop_t *) ufs_create },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_link_desc,		(vop_t *) ufs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) ufs_mmap },
 	{ &vop_open_desc,		(vop_t *) ufs_open },
 	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
 	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) ufs_remove },
 	{ &vop_rename_desc,		(vop_t *) ufs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_vnodeop_opv_desc =
 	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
 
 static vop_t **ufs_specop_p;
 static struct vnodeopv_entry_desc ufs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_close_desc,		(vop_t *) ufsspec_close },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_read_desc,		(vop_t *) ufsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) ufsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_specop_opv_desc =
 	{ &ufs_specop_p, ufs_specop_entries };
 
 static vop_t **ufs_fifoop_p;
 static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_fifoop_opv_desc =
 	{ &ufs_fifoop_p, ufs_fifoop_entries };
 
 VNODEOP_SET(ufs_vnodeop_opv_desc);
 VNODEOP_SET(ufs_specop_opv_desc);
 VNODEOP_SET(ufs_fifoop_opv_desc);
 
 int
 ufs_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 int
 ufs_vnoperatefifo(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 int
 ufs_vnoperatespec(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 
Index: head/sys/vm/vm_mmap.c
===================================================================
--- head/sys/vm/vm_mmap.c	(revision 49534)
+++ head/sys/vm/vm_mmap.c	(revision 49535)
@@ -1,1093 +1,1091 @@
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
  *
  *	@(#)vm_mmap.c	8.4 (Berkeley) 1/12/94
- * $Id: vm_mmap.c,v 1.99 1999/05/17 00:53:56 alc Exp $
+ * $Id: vm_mmap.c,v 1.100 1999/06/05 18:21:53 alc Exp $
  */
 
 /*
  * Mapped file (mmap) interface to VM
  */
 
 #include "opt_compat.h"
 #include "opt_rlimit.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/mman.h>
 #include <sys/conf.h>
 #include <sys/stat.h>
 #include <sys/vmmeter.h>
-
-#include <miscfs/specfs/specdev.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_inherit.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 
 #ifndef _SYS_SYSPROTO_H_
 struct sbrk_args {
 	int incr;
 };
 #endif
 
 /* ARGSUSED */
 int
 sbrk(p, uap)
 	struct proc *p;
 	struct sbrk_args *uap;
 {
 
 	/* Not yet implemented */
 	return (EOPNOTSUPP);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct sstk_args {
 	int incr;
 };
 #endif
 
 /* ARGSUSED */
 int
 sstk(p, uap)
 	struct proc *p;
 	struct sstk_args *uap;
 {
 
 	/* Not yet implemented */
 	return (EOPNOTSUPP);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #ifndef _SYS_SYSPROTO_H_
 struct getpagesize_args {
 	int dummy;
 };
 #endif
 
 /* ARGSUSED */
 int
 ogetpagesize(p, uap)
 	struct proc *p;
 	struct getpagesize_args *uap;
 {
 
 	p->p_retval[0] = PAGE_SIZE;
 	return (0);
 }
 #endif				/* COMPAT_43 || COMPAT_SUNOS */
 
 
 /* 
  * Memory Map (mmap) system call.  Note that the file offset
  * and address are allowed to be NOT page aligned, though if
  * the MAP_FIXED flag it set, both must have the same remainder
  * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
  * page-aligned, the actual mapping starts at trunc_page(addr)
  * and the return value is adjusted up by the page offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mmap_args {
 	void *addr;
 	size_t len;
 	int prot;
 	int flags;
 	int fd;
 	long pad;
 	off_t pos;
 };
 #endif
 
 int
 mmap(p, uap)
 	struct proc *p;
 	register struct mmap_args *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct vnode *vp;
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	vm_prot_t prot, maxprot;
 	void *handle;
 	int flags, error;
 	int disablexworkaround;
 	off_t pos;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 	prot = uap->prot & VM_PROT_ALL;
 	flags = uap->flags;
 	pos = uap->pos;
 
 	/* make sure mapping fits into numeric range etc */
 	if ((ssize_t) uap->len < 0 ||
 	    ((flags & MAP_ANON) && uap->fd != -1))
 		return (EINVAL);
 
 	if (flags & MAP_STACK) {
 		if ((uap->fd != -1) ||
 		    ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
 			return (EINVAL);
 		flags |= MAP_ANON;
 		pos = 0;
 	}
 
 	/*
 	 * Align the file position to a page boundary,
 	 * and save its page offset component.
 	 */
 	pageoff = (pos & PAGE_MASK);
 	pos -= pageoff;
 
 	/* Adjust size for rounding (on both ends). */
 	size += pageoff;			/* low end... */
 	size = (vm_size_t) round_page(size);	/* hi end */
 
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (flags & MAP_FIXED) {
 		/*
 		 * The specified address must have the same remainder
 		 * as the file offset taken modulo PAGE_SIZE, so it
 		 * should be aligned after adjustment by pageoff.
 		 */
 		addr -= pageoff;
 		if (addr & PAGE_MASK)
 			return (EINVAL);
 		/* Address range must be all in user VM space. */
 		if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
 			return (EINVAL);
 #ifndef i386
 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
 			return (EINVAL);
 #endif
 		if (addr + size < addr)
 			return (EINVAL);
 	}
 	/*
 	 * XXX for non-fixed mappings where no hint is provided or
 	 * the hint would fall in the potential heap space,
 	 * place it after the end of the largest possible heap.
 	 *
 	 * There should really be a pmap call to determine a reasonable
 	 * location.
 	 */
 	else if (addr == 0 ||
 	    (addr >= round_page((vm_offset_t)p->p_vmspace->vm_taddr) &&
 	     addr < round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ)))
 		addr = round_page((vm_offset_t)p->p_vmspace->vm_daddr + MAXDSIZ);
 
 	if (flags & MAP_ANON) {
 		/*
 		 * Mapping blank space is trivial.
 		 */
 		handle = NULL;
 		maxprot = VM_PROT_ALL;
 		pos = 0;
 	} else {
 		/*
 		 * Mapping file, get fp for validation. Obtain vnode and make
 		 * sure it is of appropriate type.
 		 */
 		if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
 			return (EBADF);
 		if (fp->f_type != DTYPE_VNODE)
 			return (EINVAL);
 		vp = (struct vnode *) fp->f_data;
 		if (vp->v_type != VREG && vp->v_type != VCHR)
 			return (EINVAL);
 		/*
 		 * XXX hack to handle use of /dev/zero to map anon memory (ala
 		 * SunOS).
 		 */
 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
 			handle = NULL;
 			maxprot = VM_PROT_ALL;
 			flags |= MAP_ANON;
 			pos = 0;
 		} else {
 			/*
 			 * cdevs does not provide private mappings of any kind.
 			 */
 			/*
 			 * However, for XIG X server to continue to work,
 			 * we should allow the superuser to do it anyway.
 			 * We only allow it at securelevel < 1.
 			 * (Because the XIG X server writes directly to video
 			 * memory via /dev/mem, it should never work at any
 			 * other securelevel.
 			 * XXX this will have to go
 			 */
 			if (securelevel >= 1)
 				disablexworkaround = 1;
 			else
 				disablexworkaround = suser(p);
 			if (vp->v_type == VCHR && disablexworkaround &&
 				(flags & (MAP_PRIVATE|MAP_COPY)))
 				 return (EINVAL);
 			/*
 			 * Ensure that file and memory protections are
 			 * compatible.  Note that we only worry about
 			 * writability if mapping is shared; in this case,
 			 * current and max prot are dictated by the open file.
 			 * XXX use the vnode instead?  Problem is: what
 			 * credentials do we use for determination? What if
 			 * proc does a setuid?
 			 */
 			maxprot = VM_PROT_EXECUTE;	/* ??? */
 			if (fp->f_flag & FREAD)
 				maxprot |= VM_PROT_READ;
 			else if (prot & PROT_READ)
 				return (EACCES);
 			/*
 			 * If we are sharing potential changes (either via
 			 * MAP_SHARED or via the implicit sharing of character
 			 * device mappings), and we are trying to get write
 			 * permission although we opened it without asking
 			 * for it, bail out.  Check for superuser, only if
 			 * we're at securelevel < 1, to allow the XIG X server
 			 * to continue to work.
 			 */
 
 			if ((flags & MAP_SHARED) != 0 ||
 			    (vp->v_type == VCHR && disablexworkaround)) {
 				if ((fp->f_flag & FWRITE) != 0) {
 					struct vattr va;
 					if ((error =
 					    VOP_GETATTR(vp, &va,
 						        p->p_ucred, p)))
 						return (error);
 					if ((va.va_flags &
 					    (IMMUTABLE|APPEND)) == 0)
 						maxprot |= VM_PROT_WRITE;
 					else if (prot & PROT_WRITE)
 						return (EPERM);
 				} else if ((prot & PROT_WRITE) != 0)
 					return (EACCES);
 			} else
 				maxprot |= VM_PROT_WRITE;
 
 			handle = (void *)vp;
 		}
 	}
 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
 	    flags, handle, pos);
 	if (error == 0)
 		p->p_retval[0] = (register_t) (addr + pageoff);
 	return (error);
 }
 
 #ifdef COMPAT_43
 #ifndef _SYS_SYSPROTO_H_
 struct ommap_args {
 	caddr_t addr;
 	int len;
 	int prot;
 	int flags;
 	int fd;
 	long pos;
 };
 #endif
 int
 ommap(p, uap)
 	struct proc *p;
 	register struct ommap_args *uap;
 {
 	struct mmap_args nargs;
 	static const char cvtbsdprot[8] = {
 		0,
 		PROT_EXEC,
 		PROT_WRITE,
 		PROT_EXEC | PROT_WRITE,
 		PROT_READ,
 		PROT_EXEC | PROT_READ,
 		PROT_WRITE | PROT_READ,
 		PROT_EXEC | PROT_WRITE | PROT_READ,
 	};
 
 #define	OMAP_ANON	0x0002
 #define	OMAP_COPY	0x0020
 #define	OMAP_SHARED	0x0010
 #define	OMAP_FIXED	0x0100
 #define	OMAP_INHERIT	0x0800
 
 	nargs.addr = uap->addr;
 	nargs.len = uap->len;
 	nargs.prot = cvtbsdprot[uap->prot & 0x7];
 	nargs.flags = 0;
 	if (uap->flags & OMAP_ANON)
 		nargs.flags |= MAP_ANON;
 	if (uap->flags & OMAP_COPY)
 		nargs.flags |= MAP_COPY;
 	if (uap->flags & OMAP_SHARED)
 		nargs.flags |= MAP_SHARED;
 	else
 		nargs.flags |= MAP_PRIVATE;
 	if (uap->flags & OMAP_FIXED)
 		nargs.flags |= MAP_FIXED;
 	if (uap->flags & OMAP_INHERIT)
 		nargs.flags |= MAP_INHERIT;
 	nargs.fd = uap->fd;
 	nargs.pos = uap->pos;
 	return (mmap(p, &nargs));
 }
 #endif				/* COMPAT_43 */
 
 
 #ifndef _SYS_SYSPROTO_H_
 struct msync_args {
 	void *addr;
 	int len;
 	int flags;
 };
 #endif
 int
 msync(p, uap)
 	struct proc *p;
 	struct msync_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int flags;
 	vm_map_t map;
 	int rv;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 	flags = uap->flags;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
 		return (EINVAL);
 
 	map = &p->p_vmspace->vm_map;
 
 	/*
 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
 	 * pages with the region containing addr".  Unfortunately, we don't
 	 * really keep track of individual mmaps so we approximate by flushing
 	 * the range of the map entry containing addr. This can be incorrect
 	 * if the region splits or is coalesced with a neighbor.
 	 */
 	if (size == 0) {
 		vm_map_entry_t entry;
 
 		vm_map_lock_read(map);
 		rv = vm_map_lookup_entry(map, addr, &entry);
 		vm_map_unlock_read(map);
 		if (rv == FALSE)
 			return (EINVAL);
 		addr = entry->start;
 		size = entry->end - entry->start;
 	}
 
 	/*
 	 * Clean the pages and interpret the return value.
 	 */
 	rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
 	    (flags & MS_INVALIDATE) != 0);
 
 	switch (rv) {
 	case KERN_SUCCESS:
 		break;
 	case KERN_INVALID_ADDRESS:
 		return (EINVAL);	/* Sun returns ENOMEM? */
 	case KERN_FAILURE:
 		return (EIO);
 	default:
 		return (EINVAL);
 	}
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct munmap_args {
 	void *addr;
 	size_t len;
 };
 #endif
 int
 munmap(p, uap)
 	register struct proc *p;
 	register struct munmap_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	vm_map_t map;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	if (size == 0)
 		return (0);
 
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 #ifndef i386
 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
 		return (EINVAL);
 #endif
 	map = &p->p_vmspace->vm_map;
 	/*
 	 * Make sure entire range is allocated.
 	 */
 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
 		return (EINVAL);
 	/* returns nothing but KERN_SUCCESS anyway */
 	(void) vm_map_remove(map, addr, addr + size);
 	return (0);
 }
 
 void
 munmapfd(p, fd)
 	struct proc *p;
 	int fd;
 {
 	/*
 	 * XXX should unmap any regions mapped to this file
 	 */
 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mprotect_args {
 	const void *addr;
 	size_t len;
 	int prot;
 };
 #endif
 int
 mprotect(p, uap)
 	struct proc *p;
 	struct mprotect_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	register vm_prot_t prot;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 	prot = uap->prot & VM_PROT_ALL;
 #if defined(VM_PROT_READ_IS_EXEC)
 	if (prot & VM_PROT_READ)
 		prot |= VM_PROT_EXECUTE;
 #endif
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
 		FALSE)) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	}
 	return (EINVAL);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct minherit_args {
 	void *addr;
 	size_t len;
 	int inherit;
 };
 #endif
 int
 minherit(p, uap)
 	struct proc *p;
 	struct minherit_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	register vm_inherit_t inherit;
 
 	addr = (vm_offset_t)uap->addr;
 	size = uap->len;
 	inherit = uap->inherit;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 	if (addr + size < addr)
 		return(EINVAL);
 
 	switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
 	    inherit)) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	}
 	return (EINVAL);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct madvise_args {
 	void *addr;
 	size_t len;
 	int behav;
 };
 #endif
 
 /* ARGSUSED */
 int
 madvise(p, uap)
 	struct proc *p;
 	struct madvise_args *uap;
 {
 	vm_offset_t start, end;
 	/*
 	 * Check for illegal addresses.  Watch out for address wrap... Note
 	 * that VM_*_ADDRESS are not constants due to casts (argh).
 	 */
 	if (VM_MAXUSER_ADDRESS > 0 &&
 		((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 #ifndef i386
 	if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
 		return (EINVAL);
 #endif
 	if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
 		return (EINVAL);
 
 	/*
 	 * Since this routine is only advisory, we default to conservative
 	 * behavior.
 	 */
 	start = trunc_page((vm_offset_t) uap->addr);
 	end = round_page((vm_offset_t) uap->addr + uap->len);
 	
 	vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav);
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mincore_args {
 	const void *addr;
 	size_t len;
 	char *vec;
 };
 #endif
 
 /* ARGSUSED */
 int
 mincore(p, uap)
 	struct proc *p;
 	struct mincore_args *uap;
 {
 	vm_offset_t addr, first_addr;
 	vm_offset_t end, cend;
 	pmap_t pmap;
 	vm_map_t map;
 	char *vec;
 	int error;
 	int vecindex, lastvecindex;
 	register vm_map_entry_t current;
 	vm_map_entry_t entry;
 	int mincoreinfo;
 	unsigned int timestamp;
 
 	/*
 	 * Make sure that the addresses presented are valid for user
 	 * mode.
 	 */
 	first_addr = addr = trunc_page((vm_offset_t) uap->addr);
 	end = addr + (vm_size_t)round_page(uap->len);
 	if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
 		return (EINVAL);
 	if (end < addr)
 		return (EINVAL);
 
 	/*
 	 * Address of byte vector
 	 */
 	vec = uap->vec;
 
 	map = &p->p_vmspace->vm_map;
 	pmap = vmspace_pmap(p->p_vmspace);
 
 	vm_map_lock_read(map);
 RestartScan:
 	timestamp = map->timestamp;
 
 	if (!vm_map_lookup_entry(map, addr, &entry))
 		entry = entry->next;
 
 	/*
 	 * Do this on a map entry basis so that if the pages are not
 	 * in the current processes address space, we can easily look
 	 * up the pages elsewhere.
 	 */
 	lastvecindex = -1;
 	for(current = entry;
 		(current != &map->header) && (current->start < end);
 		current = current->next) {
 
 		/*
 		 * ignore submaps (for now) or null objects
 		 */
 		if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
 			current->object.vm_object == NULL)
 			continue;
 		
 		/*
 		 * limit this scan to the current map entry and the
 		 * limits for the mincore call
 		 */
 		if (addr < current->start)
 			addr = current->start;
 		cend = current->end;
 		if (cend > end)
 			cend = end;
 
 		/*
 		 * scan this entry one page at a time
 		 */
 		while(addr < cend) {
 			/*
 			 * Check pmap first, it is likely faster, also
 			 * it can provide info as to whether we are the
 			 * one referencing or modifying the page.
 			 */
 			mincoreinfo = pmap_mincore(pmap, addr);
 			if (!mincoreinfo) {
 				vm_pindex_t pindex;
 				vm_ooffset_t offset;
 				vm_page_t m;
 				/*
 				 * calculate the page index into the object
 				 */
 				offset = current->offset + (addr - current->start);
 				pindex = OFF_TO_IDX(offset);
 				m = vm_page_lookup(current->object.vm_object,
 					pindex);
 				/*
 				 * if the page is resident, then gather information about
 				 * it.
 				 */
 				if (m) {
 					mincoreinfo = MINCORE_INCORE;
 					if (m->dirty ||
 						pmap_is_modified(VM_PAGE_TO_PHYS(m)))
 						mincoreinfo |= MINCORE_MODIFIED_OTHER;
 					if ((m->flags & PG_REFERENCED) ||
 						pmap_ts_referenced(VM_PAGE_TO_PHYS(m))) {
 						vm_page_flag_set(m, PG_REFERENCED);
 						mincoreinfo |= MINCORE_REFERENCED_OTHER;
 					}
 				}
 			}
 
 			/*
 			 * subyte may page fault.  In case it needs to modify
 			 * the map, we release the lock.
 			 */
 			vm_map_unlock_read(map);
 
 			/*
 			 * calculate index into user supplied byte vector
 			 */
 			vecindex = OFF_TO_IDX(addr - first_addr);
 
 			/*
 			 * If we have skipped map entries, we need to make sure that
 			 * the byte vector is zeroed for those skipped entries.
 			 */
 			while((lastvecindex + 1) < vecindex) {
 				error = subyte( vec + lastvecindex, 0);
 				if (error) {
 					return (EFAULT);
 				}
 				++lastvecindex;
 			}
 
 			/*
 			 * Pass the page information to the user
 			 */
 			error = subyte( vec + vecindex, mincoreinfo);
 			if (error) {
 				return (EFAULT);
 			}
 
 			/*
 			 * If the map has changed, due to the subyte, the previous
 			 * output may be invalid.
 			 */
 			vm_map_lock_read(map);
 			if (timestamp != map->timestamp)
 				goto RestartScan;
 
 			lastvecindex = vecindex;
 			addr += PAGE_SIZE;
 		}
 	}
 
 	/*
 	 * subyte may page fault.  In case it needs to modify
 	 * the map, we release the lock.
 	 */
 	vm_map_unlock_read(map);
 
 	/*
 	 * Zero the last entries in the byte vector.
 	 */
 	vecindex = OFF_TO_IDX(end - first_addr);
 	while((lastvecindex + 1) < vecindex) {
 		error = subyte( vec + lastvecindex, 0);
 		if (error) {
 			return (EFAULT);
 		}
 		++lastvecindex;
 	}
 	
 	/*
 	 * If the map has changed, due to the subyte, the previous
 	 * output may be invalid.
 	 */
 	vm_map_lock_read(map);
 	if (timestamp != map->timestamp)
 		goto RestartScan;
 	vm_map_unlock_read(map);
 
 	return (0);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mlock_args {
 	const void *addr;
 	size_t len;
 };
 #endif
 int
 mlock(p, uap)
 	struct proc *p;
 	struct mlock_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int error;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 
 	/* disable wrap around */
 	if (addr + size < addr)
 		return (EINVAL);
 
 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
 		return (EAGAIN);
 
 #ifdef pmap_wired_count
 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
 		return (ENOMEM);
 #else
 	error = suser(p);
 	if (error)
 		return (error);
 #endif
 
 	error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mlockall_args {
 	int	how;
 };
 #endif
 
 int
 mlockall(p, uap)
 	struct proc *p;
 	struct mlockall_args *uap;
 {
 	return 0;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct mlockall_args {
 	int	how;
 };
 #endif
 
 int
 munlockall(p, uap)
 	struct proc *p;
 	struct munlockall_args *uap;
 {
 	return 0;
 }
 
 #ifndef _SYS_SYSPROTO_H_
 struct munlock_args {
 	const void *addr;
 	size_t len;
 };
 #endif
 int
 munlock(p, uap)
 	struct proc *p;
 	struct munlock_args *uap;
 {
 	vm_offset_t addr;
 	vm_size_t size, pageoff;
 	int error;
 
 	addr = (vm_offset_t) uap->addr;
 	size = uap->len;
 
 	pageoff = (addr & PAGE_MASK);
 	addr -= pageoff;
 	size += pageoff;
 	size = (vm_size_t) round_page(size);
 
 	/* disable wrap around */
 	if (addr + size < addr)
 		return (EINVAL);
 
 #ifndef pmap_wired_count
 	error = suser(p);
 	if (error)
 		return (error);
 #endif
 
 	error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
 }
 
 /*
  * Internal version of mmap.
  * Currently used by mmap, exec, and sys5 shared memory.
  * Handle is either a vnode pointer or NULL for MAP_ANON.
  */
 int
 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
 	vm_prot_t maxprot, int flags,
 	void *handle,
 	vm_ooffset_t foff)
 {
 	boolean_t fitit;
 	vm_object_t object;
 	struct vnode *vp = NULL;
 	objtype_t type;
 	int rv = KERN_SUCCESS;
 	vm_ooffset_t objsize;
 	int docow;
 	struct proc *p = curproc;
 
 	if (size == 0)
 		return (0);
 
 	objsize = size = round_page(size);
 
 	/*
 	 * We currently can only deal with page aligned file offsets.
 	 * The check is here rather than in the syscall because the
 	 * kernel calls this function internally for other mmaping
 	 * operations (such as in exec) and non-aligned offsets will
 	 * cause pmap inconsistencies...so we want to be sure to
 	 * disallow this in all cases.
 	 */
 	if (foff & PAGE_MASK)
 		return (EINVAL);
 
 	if ((flags & MAP_FIXED) == 0) {
 		fitit = TRUE;
 		*addr = round_page(*addr);
 	} else {
 		if (*addr != trunc_page(*addr))
 			return (EINVAL);
 		fitit = FALSE;
 		(void) vm_map_remove(map, *addr, *addr + size);
 	}
 
 	/*
 	 * Lookup/allocate object.
 	 */
 	if (flags & MAP_ANON) {
 		type = OBJT_DEFAULT;
 		/*
 		 * Unnamed anonymous regions always start at 0.
 		 */
 		if (handle == 0)
 			foff = 0;
 	} else {
 		vp = (struct vnode *) handle;
 		if (vp->v_type == VCHR) {
 			type = OBJT_DEVICE;
 			handle = (void *)(intptr_t)vp->v_rdev;
 		} else {
 			struct vattr vat;
 			int error;
 
 			error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
 			if (error)
 				return (error);
 			objsize = round_page(vat.va_size);
 			type = OBJT_VNODE;
 		}
 	}
 
 	if (handle == NULL) {
 		object = NULL;
 		docow = 0;
 	} else {
 		object = vm_pager_allocate(type,
 			handle, objsize, prot, foff);
 		if (object == NULL)
 			return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
 		docow = MAP_PREFAULT_PARTIAL;
 	}
 
 	/*
 	 * Force device mappings to be shared.
 	 */
 	if (type == OBJT_DEVICE) {
 		flags &= ~(MAP_PRIVATE|MAP_COPY);
 		flags |= MAP_SHARED;
 	}
 
 	if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
 		docow |= MAP_COPY_ON_WRITE;
 	}
 
 #if defined(VM_PROT_READ_IS_EXEC)
 	if (prot & VM_PROT_READ)
 		prot |= VM_PROT_EXECUTE;
 
 	if (maxprot & VM_PROT_READ)
 		maxprot |= VM_PROT_EXECUTE;
 #endif
 
 	if (fitit) {
 		*addr = pmap_addr_hint(object, *addr, size);
 	}
 
 	if (flags & MAP_STACK)
 		rv = vm_map_stack (map, *addr, size, prot,
 				   maxprot, docow);
 	else
 		rv = vm_map_find(map, object, foff, addr, size, fitit,
 				 prot, maxprot, docow);
 
 	if (rv != KERN_SUCCESS) {
 		/*
 		 * Lose the object reference. Will destroy the
 		 * object if it's an unnamed anonymous mapping
 		 * or named anonymous without other references.
 		 */
 		vm_object_deallocate(object);
 		goto out;
 	}
 
 	/*
 	 * Shared memory is also shared with children.
 	 */
 	if (flags & (MAP_SHARED|MAP_INHERIT)) {
 		rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
 		if (rv != KERN_SUCCESS) {
 			(void) vm_map_remove(map, *addr, *addr + size);
 			goto out;
 		}
 	}
 out:
 	switch (rv) {
 	case KERN_SUCCESS:
 		return (0);
 	case KERN_INVALID_ADDRESS:
 	case KERN_NO_SPACE:
 		return (ENOMEM);
 	case KERN_PROTECTION_FAILURE:
 		return (EACCES);
 	default:
 		return (EINVAL);
 	}
 }
Index: head/sys/vm/vm_swap.c
===================================================================
--- head/sys/vm/vm_swap.c	(revision 49534)
+++ head/sys/vm/vm_swap.c	(revision 49535)
@@ -1,388 +1,386 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_swap.c	8.5 (Berkeley) 2/17/94
- * $Id: vm_swap.c,v 1.78 1999/07/17 19:59:55 phk Exp $
+ * $Id: vm_swap.c,v 1.79 1999/07/20 21:29:11 green Exp $
  */
 
 #include "opt_devfs.h"
 #include "opt_swap.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/buf.h>
-#include <sys/conf.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif
 #include <sys/proc.h>
 #include <sys/namei.h>
 #include <sys/dmap.h>		/* XXX */
 #include <sys/vnode.h>
 #include <sys/fcntl.h>
 #include <sys/blist.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
+#include <sys/conf.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/swap_pager.h>
-
-#include <miscfs/specfs/specdev.h>
 
 /*
  * "sw" is a fake device implemented
  * in vm_swap.c and used only internally to get to swstrategy.
  * It cannot be provided to the users, because the
  * swstrategy routine munches the b_dev and b_blkno entries
  * before calling the appropriate driver.  This would horribly
  * confuse, e.g. the hashing routines. Instead, /dev/drum is
  * provided as a character (raw) device.
  */
 
 static	d_strategy_t    swstrategy;
 
 #define CDEV_MAJOR 4
 #define BDEV_MAJOR 26
 
 static struct cdevsw sw_cdevsw = {
 	/* open */	nullopen,
 	/* close */	nullclose,
 	/* read */	physread,
 	/* write */	physwrite,
 	/* ioctl */	noioctl,
 	/* stop */	nostop,
 	/* reset */	noreset,
 	/* devtotty */	nodevtotty,
 	/* poll */	nopoll,
 	/* mmap */	nommap,
 	/* strategy */	swstrategy,
 	/* name */	"sw",
 	/* parms */	noparms,
 	/* maj */	CDEV_MAJOR,
 	/* dump */	nodump,
 	/* psize */	nopsize,
 	/* flags */	0,
 	/* maxio */	0,
 	/* bmaj */	BDEV_MAJOR
 };
 
 
 /*
  * Indirect driver for multi-controller paging.
  */
 
 #ifndef NSWAPDEV
 #define NSWAPDEV	4
 #endif
 static struct swdevt should_be_malloced[NSWAPDEV];
 static struct swdevt *swdevt = should_be_malloced;
 struct vnode *swapdev_vp;
 static int nswap;		/* first block after the interleaved devs */
 static int nswdev = NSWAPDEV;
 int vm_swap_size;
 
 /*
  *	swstrategy:
  *
  *	Perform swap strategy interleave device selection
  *
  *	The bp is expected to be locked and *not* B_DONE on call.
  */
 
 static void
 swstrategy(bp)
 	register struct buf *bp;
 {
 	int s, sz, off, seg, index;
 	register struct swdevt *sp;
 	struct vnode *vp;
 
 	sz = howmany(bp->b_bcount, PAGE_SIZE);
 	/*
 	 * Convert interleaved swap into per-device swap.  Note that
 	 * the block size is left in PAGE_SIZE'd chunks (for the newswap)
 	 * here.
 	 */
 
 	if (nswdev > 1) {
 		off = bp->b_blkno % dmmax;
 		if (off + sz > dmmax) {
 			bp->b_error = EINVAL;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return;
 		}
 		seg = bp->b_blkno / dmmax;
 		index = seg % nswdev;
 		seg /= nswdev;
 		bp->b_blkno = seg * dmmax + off;
 	} else {
 		index = 0;
 	}
 	sp = &swdevt[index];
 	if (bp->b_blkno + sz > sp->sw_nblks) {
 		bp->b_error = EINVAL;
 		bp->b_flags |= B_ERROR;
 		biodone(bp);
 		return;
 	}
 	bp->b_dev = sp->sw_device;
 	if (sp->sw_vp == NULL) {
 		bp->b_error = ENODEV;
 		bp->b_flags |= B_ERROR;
 		biodone(bp);
 		return;
 	}
 
 	/*
 	 * Convert from PAGE_SIZE'd to DEV_BSIZE'd chunks for the actual I/O
 	 */
 	bp->b_blkno = ctodb(bp->b_blkno);
 
 	vhold(sp->sw_vp);
 	s = splvm();
 	if ((bp->b_flags & B_READ) == 0) {
 		vp = bp->b_vp;
 		if (vp) {
 			vp->v_numoutput--;
 			if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
 				vp->v_flag &= ~VBWAIT;
 				wakeup(&vp->v_numoutput);
 			}
 		}
 		sp->sw_vp->v_numoutput++;
 	}
 	pbreassignbuf(bp, sp->sw_vp);
 	splx(s);
 	VOP_STRATEGY(bp->b_vp, bp);
 }
 
 /*
  * System call swapon(name) enables swapping on device name,
  * which must be in the swdevsw.  Return EBUSY
  * if already swapping on this device.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct swapon_args {
 	char *name;
 };
 #endif
 
 /* ARGSUSED */
 int
 swapon(p, uap)
 	struct proc *p;
 	struct swapon_args *uap;
 {
 	register struct vnode *vp;
 	dev_t dev;
 	struct nameidata nd;
 	int error;
 
 	error = suser(p);
 	if (error)
 		return (error);
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, p);
 	error = namei(&nd);
 	if (error)
 		return (error);
 
 	vp = nd.ni_vp;
 
 	switch (vp->v_type) {
 	case VBLK:
 		dev = vp->v_rdev;
 		if (bdevsw(dev) == NULL) {
 			error = ENXIO;
 			break;
 		}
 		error = swaponvp(p, vp, dev, 0);
 		break;
 	case VCHR:
 		/*
 		 * For now, we disallow swapping to regular files.
 		 * It requires logical->physcal block translation
 		 * support in the swap pager before it will work.
 		 */
 		error = ENOTBLK;
 		break;
 #if 0
 		error = VOP_GETATTR(vp, &attr, p->p_ucred, p);
 		if (!error)
 			error = swaponvp(p, vp, NODEV, attr.va_size / DEV_BSIZE);
 		break;
 #endif
 	default:
 		error = EINVAL;
 		break;
 	}
 
 	if (error)
 		vrele(vp);
 
 	return (error);
 }
 
 /*
  * Swfree(index) frees the index'th portion of the swap map.
  * Each of the nswdev devices provides 1/nswdev'th of the swap
  * space, which is laid out with blocks of dmmax pages circularly
  * among the devices.
  *
  * The new swap code uses page-sized blocks.  The old swap code used
  * DEV_BSIZE'd chunks.
  *
  * XXX locking when multiple swapon's run in parallel
  */
 int
 swaponvp(p, vp, dev, nblks)
 	struct proc *p;
 	struct vnode *vp;
 	dev_t dev;
 	u_long nblks;
 {
 	int index;
 	register struct swdevt *sp;
 	register swblk_t vsbase;
 	register long blk;
 	swblk_t dvbase;
 	int error;
 
 	ASSERT_VOP_UNLOCKED(vp, "swaponvp");
 	for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) {
 		if (sp->sw_vp == vp)
 			return EBUSY;
 		if (!sp->sw_vp)
 			goto found;
 
 	}
 	return EINVAL;
     found:
 	(void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = VOP_OPEN(vp, FREAD | FWRITE, p->p_ucred, p);
 	(void) VOP_UNLOCK(vp, 0, p);
 	if (error)
 		return (error);
 
 	if (nblks == 0 && dev != NODEV && (bdevsw(dev)->d_psize == 0 ||
 	    (nblks = (*bdevsw(dev)->d_psize) (dev)) == -1)) {
 		(void) VOP_CLOSE(vp, FREAD | FWRITE, p->p_ucred, p);
 		return (ENXIO);
 	}
 	if (nblks == 0) {
 		(void) VOP_CLOSE(vp, FREAD | FWRITE, p->p_ucred, p);
 		return (ENXIO);
 	}
 	/*
 	 * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
 	 * First chop nblks off to page-align it, then convert.
 	 * 
 	 * sw->sw_nblks is in page-sized chunks now too.
 	 */
 	nblks &= ~(ctodb(1) - 1);
 	nblks = dbtoc(nblks);
 
 	sp->sw_vp = vp;
 	sp->sw_dev = dev2budev(dev);
 	sp->sw_device = dev;
 	sp->sw_flags |= SW_FREED;
 	sp->sw_nblks = nblks;
 
 	/*
 	 * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not
 	 * DEV_BSIZE'd. 
 	 */
 
 	if (nblks * nswdev > nswap)
 		nswap = (nblks+1) * nswdev;
 
 	if (swapblist == NULL)
 		swapblist = blist_create(nswap);
 	else
 		blist_resize(&swapblist, nswap, 0);
 
 	for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) {
 		blk = min(nblks - dvbase, dmmax);
 		vsbase = index * dmmax + dvbase * nswdev;
 		blist_free(swapblist, vsbase, blk);
 		vm_swap_size += blk;
 	}
 
 	if (!swapdev_vp) {
 		struct vnode *vp1;
 		struct vnode *nvp;
 
 		error = getnewvnode(VT_NON, (struct mount *) 0,
 		    spec_vnodeop_p, &nvp);
 		if (error)
 			panic("Cannot get vnode for swapdev");
 		vp1 = nvp;
 		vp1->v_type = VBLK;
 		if ((nvp = checkalias(vp1, makeudev(BDEV_MAJOR, 0),
 		    (struct mount *) 0))) {
 			vput(vp1);
 			vp1 = nvp;
 		}
 		swapdev_vp = vp1;
 	}
 	return (0);
 }
 
 static int sw_devsw_installed;
 #ifdef DEVFS
 static void *drum_devfs_token;
 #endif
 
 static void 	sw_drvinit(void *unused)
 {
 
 	if( ! sw_devsw_installed ) {
 		cdevsw_add(&sw_cdevsw);
 		/*
 		 * XXX: This is pretty gross, but it will disappear with
 		 * the blockdevices RSN.
 		 */
 		sw_cdevsw.d_open = nullopen;
 		sw_cdevsw.d_close = nullclose;
 		sw_devsw_installed = 1;
 #ifdef DEVFS
 		drum_devfs_token = devfs_add_devswf(&sw_cdevsw, 0, DV_CHR,
 						    UID_ROOT, GID_KMEM, 0640,
 						    "drum");
 #endif
     	}
 }
 
 SYSINIT(swdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,sw_drvinit,NULL)