Index: head/sys/coda/coda_vnops.c
===================================================================
--- head/sys/coda/coda_vnops.c	(revision 75579)
+++ head/sys/coda/coda_vnops.c	(revision 75580)
@@ -1,1957 +1,1956 @@
 /*
  * 
  *             Coda: an Experimental Distributed File System
  *                              Release 3.1
  * 
  *           Copyright (c) 1987-1998 Carnegie Mellon University
  *                          All Rights Reserved
  * 
  * Permission  to  use, copy, modify and distribute this software and its
  * documentation is hereby granted,  provided  that  both  the  copyright
  * notice  and  this  permission  notice  appear  in  all  copies  of the
  * software, derivative works or  modified  versions,  and  any  portions
  * thereof, and that both notices appear in supporting documentation, and
  * that credit is given to Carnegie Mellon University  in  all  documents
  * and publicity pertaining to direct or indirect use of this code or its
  * derivatives.
  * 
  * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
  * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
  * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
  * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
  * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
  * ANY DERIVATIVE WORK.
  * 
  * Carnegie  Mellon  encourages  users  of  this  software  to return any
  * improvements or extensions that  they  make,  and  to  grant  Carnegie
  * Mellon the rights to redistribute these changes without encumbrance.
  * 
  *  	@(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
  * $FreeBSD$
  * 
  */
 
 /* 
  * Mach Operating System
  * Copyright (c) 1990 Carnegie-Mellon University
  * Copyright (c) 1989 Carnegie-Mellon University
  * All rights reserved.  The CMU software License Agreement specifies
  * the terms and conditions for use and redistribution.
  */
 
 /*
  * This code was written for the Coda file system at Carnegie Mellon
  * University.  Contributers include David Steere, James Kistler, and
  * M. Satyanarayanan.  
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/errno.h>
 #include <sys/acct.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/uio.h>
 #include <sys/namei.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <coda/coda.h>
 #include <coda/cnode.h>
 #include <coda/coda_vnops.h>
 #include <coda/coda_venus.h>
 #include <coda/coda_opstats.h>
 #include <coda/coda_subr.h>
 #include <coda/coda_namecache.h>
 #include <coda/coda_pioctl.h>
 
 /* 
  * These flags select various performance enhancements.
  */
 int coda_attr_cache  = 1;       /* Set to cache attributes in the kernel */
 int coda_symlink_cache = 1;     /* Set to cache symbolic link information */
 int coda_access_cache = 1;      /* Set to handle some access checks directly */
 
 /* structure to keep track of vfs calls */
 
 struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];
 
 #define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
 #define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
 #define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
 #define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)
 
 /* What we are delaying for in printf */
 int coda_printf_delay = 0;  /* in microseconds */
 int coda_vnop_print_entry = 0;
 static int coda_lockdebug = 0;
 
 /* Definition of the vfs operation vector */
 
 /*
  * Some NetBSD details:
  * 
  *   coda_start is called at the end of the mount syscall.
  *   coda_init is called at boot time.
  */
 
 #define ENTRY  if(coda_vnop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
 
 /* Definition of the vnode operation vector */
 
 struct vnodeopv_entry_desc coda_vnodeop_entries[] = {
     { &vop_default_desc, coda_vop_error },
     { &vop_lookup_desc, coda_lookup },		/* lookup */
     { &vop_create_desc, coda_create },		/* create */
     { &vop_mknod_desc, coda_vop_error },	/* mknod */
     { &vop_open_desc, coda_open },		/* open */
     { &vop_close_desc, coda_close },		/* close */
     { &vop_access_desc, coda_access },		/* access */
     { &vop_getattr_desc, coda_getattr },	/* getattr */
     { &vop_setattr_desc, coda_setattr },	/* setattr */
     { &vop_read_desc, coda_read },		/* read */
     { &vop_write_desc, coda_write },		/* write */
     { &vop_ioctl_desc, coda_ioctl },		/* ioctl */
     { &vop_fsync_desc, coda_fsync },		/* fsync */
     { &vop_remove_desc, coda_remove },		/* remove */
     { &vop_link_desc, coda_link },		/* link */
     { &vop_rename_desc, coda_rename },		/* rename */
     { &vop_mkdir_desc, coda_mkdir },		/* mkdir */
     { &vop_rmdir_desc, coda_rmdir },		/* rmdir */
     { &vop_symlink_desc, coda_symlink },	/* symlink */
     { &vop_readdir_desc, coda_readdir },	/* readdir */
     { &vop_readlink_desc, coda_readlink },	/* readlink */
     { &vop_inactive_desc, coda_inactive },	/* inactive */
     { &vop_reclaim_desc, coda_reclaim },	/* reclaim */
     { &vop_lock_desc, coda_lock },		/* lock */
     { &vop_unlock_desc, coda_unlock },		/* unlock */
     { &vop_bmap_desc, coda_bmap },		/* bmap */
     { &vop_strategy_desc, coda_strategy },	/* strategy */
     { &vop_print_desc, coda_vop_error },	/* print */
     { &vop_islocked_desc, coda_islocked },	/* islocked */
     { &vop_pathconf_desc, coda_vop_error },	/* pathconf */
     { &vop_advlock_desc, coda_vop_nop },	/* advlock */
-    { &vop_bwrite_desc, coda_vop_error },	/* bwrite */
     { &vop_lease_desc, coda_vop_nop },		/* lease */
     { &vop_poll_desc, (vop_t *) vop_stdpoll },
     { &vop_getpages_desc, coda_fbsd_getpages },	/* pager intf.*/
     { &vop_putpages_desc, coda_fbsd_putpages },	/* pager intf.*/
 
 #if	0
 
     we need to define these someday
 #define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
 #define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
 #define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
 #define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
 #define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
 
     missing
     { &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
     { &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
     { &vop_whiteout_desc,	(vop_t *) ufs_whiteout },
 #endif
     { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
 };
 
 static struct vnodeopv_desc coda_vnodeop_opv_desc =
 		{ &coda_vnodeop_p, coda_vnodeop_entries };
 
 VNODEOP_SET(coda_vnodeop_opv_desc);
 
 /* A generic panic: we were called with something we didn't define yet */
 int
 coda_vop_error(void *anon) {
     struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
 
     myprintf(("coda_vop_error: Vnode operation %s called, but not defined.\n",
 	      (*desc)->vdesc_name));
     /*
     panic("coda_vop_error");
     */
     return EIO;
 }
 
 /* A generic do-nothing.  For lease_check, advlock */
 int
 coda_vop_nop(void *anon) {
     struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
 
     if (codadebug) {
 	myprintf(("Vnode operation %s called, but unsupported\n",
 		  (*desc)->vdesc_name));
     } 
    return (0);
 }
 
 int
 coda_vnodeopstats_init(void)
 {
 	register int i;
 	
 	for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
 		coda_vnodeopstats[i].opcode = i;
 		coda_vnodeopstats[i].entries = 0;
 		coda_vnodeopstats[i].sat_intrn = 0;
 		coda_vnodeopstats[i].unsat_intrn = 0;
 		coda_vnodeopstats[i].gen_intrn = 0;
 	}
 	return 0;
 }
 		
 /* 
  * coda_open calls Venus to return the device, inode pair of the cache
  * file holding the data. Using iget, coda_open finds the vnode of the
  * cache file, and then opens it.
  */
 int
 coda_open(v)
     void *v;
 {
     /* 
      * NetBSD can pass the O_EXCL flag in mode, even though the check
      * has already happened.  Venus defensively assumes that if open
      * is passed the EXCL, it must be a bug.  We strip the flag here.
      */
 /* true args */
     struct vop_open_args *ap = v;
     register struct vnode **vpp = &(ap->a_vp);
     struct cnode *cp = VTOC(*vpp);
     int flag = ap->a_mode & (~O_EXCL);
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
     struct vnode *vp;
     dev_t dev;
     ino_t inode;
 
     MARK_ENTRY(CODA_OPEN_STATS);
 
     /* Check for open of control file. */
     if (IS_CTL_VP(*vpp)) {
 	/* XXX */
 	/* if (WRITEABLE(flag)) */ 
 	if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
 	    MARK_INT_FAIL(CODA_OPEN_STATS);
 	    return(EACCES);
 	}
 	MARK_INT_SAT(CODA_OPEN_STATS);
 	return(0);
     }
 
     error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, p, &dev, &inode);
     if (error)
 	return (error);
     if (!error) {
 	CODADEBUG( CODA_OPEN,myprintf(("open: dev %#lx inode %lu result %d\n",
 				       (u_long)dev2udev(dev), (u_long)inode,
 				       error)); )
     }
 
     /* Translate the <device, inode> pair for the cache file into
        an inode pointer. */
     error = coda_grab_vnode(dev, inode, &vp);
     if (error)
 	return (error);
 
     /* We get the vnode back locked.  Needs unlocked */
     VOP_UNLOCK(vp, 0, p);
     /* Keep a reference until the close comes in. */
     vref(*vpp);                
 
     /* Save the vnode pointer for the cache file. */
     if (cp->c_ovp == NULL) {
 	cp->c_ovp = vp;
     } else {
 	if (cp->c_ovp != vp)
 	    panic("coda_open:  cp->c_ovp != ITOV(ip)");
     }
     cp->c_ocount++;
 
     /* Flush the attribute cached if writing the file. */
     if (flag & FWRITE) {
 	cp->c_owrite++;
 	cp->c_flags &= ~C_VATTR;
     }
 
     /* Save the <device, inode> pair for the cache file to speed
        up subsequent page_read's. */
     cp->c_device = dev;
     cp->c_inode = inode;
 
     /* Open the cache file. */
     error = VOP_OPEN(vp, flag, cred, p); 
     if (error) {
     	printf("coda_open: VOP_OPEN on container failed %d\n", error);
 	return (error);
     }
 /* grab (above) does this when it calls newvnode unless it's in the cache*/
     if (vp->v_type == VREG) {
     	error = vfs_object_create(vp, p, cred);
 	if (error != 0) {
 	    printf("coda_open: vfs_object_create() returns %d\n", error);
 	    vput(vp);
 	}
     }
 
     return(error);
 }
 
 /*
  * Close the cache file used for I/O and notify Venus.
  */
 int
 coda_close(v)
     void *v;
 {
 /* true args */
     struct vop_close_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     int flag = ap->a_fflag;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_CLOSE_STATS);
 
     /* Check for close of control file. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_CLOSE_STATS);
 	return(0);
     }
 
     if (IS_UNMOUNTING(cp)) {
 	if (cp->c_ovp) {
 #ifdef	CODA_VERBOSE
 	    printf("coda_close: destroying container ref %d, ufs vp %p of vp %p/cp %p\n",
 		    vp->v_usecount, cp->c_ovp, vp, cp);
 #endif
 #ifdef	hmm
 	    vgone(cp->c_ovp);
 #else
 	    VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
 	    vrele(cp->c_ovp);
 #endif
 	} else {
 #ifdef	CODA_VERBOSE
 	    printf("coda_close: NO container vp %p/cp %p\n", vp, cp);
 #endif
 	}
 	return ENODEV;
     } else {
 	VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
 	vrele(cp->c_ovp);
     }
 
     if (--cp->c_ocount == 0)
 	cp->c_ovp = NULL;
 
     if (flag & FWRITE)                    /* file was opened for write */
 	--cp->c_owrite;
 
     error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, p);
     vrele(CTOV(cp));
 
     CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); )
     return(error);
 }
 
 int
 coda_read(v)
     void *v;
 {
     struct vop_read_args *ap = v;
 
     ENTRY;
     return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
 		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
 }
 
 int
 coda_write(v)
     void *v;
 {
     struct vop_write_args *ap = v;
 
     ENTRY;
     return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
 		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
 }
 
 int
 coda_rdwr(vp, uiop, rw, ioflag, cred, p)
     struct vnode *vp;
     struct uio *uiop;
     enum uio_rw rw;
     int ioflag;
     struct ucred *cred;
     struct proc *p;
 { 
 /* upcall decl */
   /* NOTE: container file operation!!! */
 /* locals */
     struct cnode *cp = VTOC(vp);
     struct vnode *cfvp = cp->c_ovp;
     int igot_internally = 0;
     int opened_internally = 0;
     int error = 0;
 
     MARK_ENTRY(CODA_RDWR_STATS);
 
     CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %lld, %d)\n", rw, 
 			      (void *)uiop->uio_iov->iov_base, uiop->uio_resid, 
 			      (long long)uiop->uio_offset, uiop->uio_segflg)); )
 	
     /* Check for rdwr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_RDWR_STATS);
 	return(EINVAL);
     }
 
     /* 
      * If file is not already open this must be a page
      * {read,write} request.  Iget the cache file's inode
      * pointer if we still have its <device, inode> pair.
      * Otherwise, we must do an internal open to derive the
      * pair. 
      */
     if (cfvp == NULL) {
 	/* 
 	 * If we're dumping core, do the internal open. Otherwise
 	 * venus won't have the correct size of the core when
 	 * it's completely written.
 	 */
 	PROC_LOCK(p);
 	if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) { 
 	    PROC_UNLOCK(p);
 	    igot_internally = 1;
 	    error = coda_grab_vnode(cp->c_device, cp->c_inode, &cfvp);
 	    if (error) {
 		MARK_INT_FAIL(CODA_RDWR_STATS);
 		return(error);
 	    }
 	    /* 
 	     * We get the vnode back locked in both Mach and
 	     * NetBSD.  Needs unlocked 
 	     */
 	    VOP_UNLOCK(cfvp, 0, p);
 	}
 	else {
 	    PROC_UNLOCK(p);
 	    opened_internally = 1;
 	    MARK_INT_GEN(CODA_OPEN_STATS);
 	    error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), 
 			     cred, p);
 printf("coda_rdwr: Internally Opening %p\n", vp);
 	    if (error) {
 		printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
 		return (error);
 	    }
 	    if (vp->v_type == VREG) {
 		error = vfs_object_create(vp, p, cred);
 		if (error != 0) {
 		    printf("coda_rdwr: vfs_object_create() returns %d\n", error);
 		    vput(vp);
 		}
 	    }
 	    if (error) {
 		MARK_INT_FAIL(CODA_RDWR_STATS);
 		return(error);
 	    }
 	    cfvp = cp->c_ovp;
 	}
     }
 
     /* Have UFS handle the call. */
     CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = (%lx.%lx.%lx), refcnt = %d\n",
 			      cp->c_fid.Volume, cp->c_fid.Vnode, 
 			      cp->c_fid.Unique, CTOV(cp)->v_usecount)); )
 
 
     if (rw == UIO_READ) {
 	error = VOP_READ(cfvp, uiop, ioflag, cred);
     } else {
 	error = VOP_WRITE(cfvp, uiop, ioflag, cred);
 	/* ufs_write updates the vnode_pager_setsize for the vnode/object */
 
 	{   struct vattr attr;
 
 	    if (VOP_GETATTR(cfvp, &attr, cred, p) == 0) {
 		vnode_pager_setsize(vp, attr.va_size);
 	    }
 	}
     }
 
     if (error)
 	MARK_INT_FAIL(CODA_RDWR_STATS);
     else
 	MARK_INT_SAT(CODA_RDWR_STATS);
 
     /* Do an internal close if necessary. */
     if (opened_internally) {
 	MARK_INT_GEN(CODA_CLOSE_STATS);
 	(void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, p);
     }
 
     /* Invalidate cached attributes if writing. */
     if (rw == UIO_WRITE)
 	cp->c_flags &= ~C_VATTR;
     return(error);
 }
 
 int
 coda_ioctl(v)
     void *v;
 {
 /* true args */
     struct vop_ioctl_args *ap = v;
     struct vnode *vp = ap->a_vp;
     int com = ap->a_command;
     caddr_t data = ap->a_data;
     int flag = ap->a_fflag;
     struct ucred *cred = ap->a_cred;
     struct proc  *p = ap->a_p;
 /* locals */
     int error;
     struct vnode *tvp;
     struct nameidata ndp;
     struct PioctlData *iap = (struct PioctlData *)data;
 
     MARK_ENTRY(CODA_IOCTL_STATS);
 
     CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)
 	
     /* Don't check for operation on a dying object, for ctlvp it
        shouldn't matter */
 	
     /* Must be control object to succeed. */
     if (!IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));)
 	    return (EOPNOTSUPP);
     }
     /* Look up the pathname. */
 
     /* Should we use the name cache here? It would get it from
        lookupname sooner or later anyway, right? */
 
     NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, p);
     error = namei(&ndp);
     tvp = ndp.ni_vp;
 
     if (error) {
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n",
 				   error));)
 	return(error);
     }
 
     /* 
      * Make sure this is a coda style cnode, but it may be a
      * different vfsp 
      */
     if (tvp->v_op != coda_vnodeop_p) {
 	vrele(tvp);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, 
 		 myprintf(("coda_ioctl error: %s not a coda object\n", 
 			iap->path));)
 	return(EINVAL);
     }
 
     if (iap->vi.in_size > VC_MAXDATASIZE) {
 	NDFREE(&ndp, 0);
 	return(EINVAL);
     }
     error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, p);
 
     if (error)
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
     else
 	CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )
 
     vrele(tvp);
     NDFREE(&ndp, NDF_ONLY_PNBUF);
     return(error);
 }
 
 /*
  * To reduce the cost of a user-level venus;we cache attributes in
  * the kernel.  Each cnode has storage allocated for an attribute. If
  * c_vattr is valid, return a reference to it. Otherwise, get the
  * attributes from venus and store them in the cnode.  There is some
  * question if this method is a security leak. But I think that in
  * order to make this call, the user must have done a lookup and
  * opened the file, and therefore should already have access.  
  */
 int
 coda_getattr(v)
     void *v;
 {
 /* true args */
     struct vop_getattr_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct vattr *vap = ap->a_vap;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_GETATTR_STATS);
 
     if (IS_UNMOUNTING(cp))
 	return ENODEV;
 
     /* Check for getattr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_GETATTR_STATS);
 	return(ENOENT);
     }
 
     /* Check to see if the attributes have already been cached */
     if (VALID_VATTR(cp)) { 
 	CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: (%lx.%lx.%lx)\n",
 				       cp->c_fid.Volume,
 				       cp->c_fid.Vnode,
 				       cp->c_fid.Unique));});
 	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
 		 print_vattr(&cp->c_vattr); );
 	
 	*vap = cp->c_vattr;
 	MARK_INT_SAT(CODA_GETATTR_STATS);
 	return(0);
     }
 
     error = venus_getattr(vtomi(vp), &cp->c_fid, cred, p, vap);
 
     if (!error) {
 	CODADEBUG(CODA_GETATTR, myprintf(("getattr miss (%lx.%lx.%lx): result %d\n",
 				     cp->c_fid.Volume,
 				     cp->c_fid.Vnode,
 				     cp->c_fid.Unique,
 				     error)); )
 	    
 	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
 		 print_vattr(vap);	);
 	
     {	int size = vap->va_size;
     	struct vnode *convp = cp->c_ovp;
 	if (convp != (struct vnode *)0) {
 	    vnode_pager_setsize(convp, size);
 	}
     }
 	/* If not open for write, store attributes in cnode */   
 	if ((cp->c_owrite == 0) && (coda_attr_cache)) {  
 	    cp->c_vattr = *vap;
 	    cp->c_flags |= C_VATTR; 
 	}
 	
     }
     return(error);
 }
 
 int
 coda_setattr(v)
     void *v;
 {
 /* true args */
     struct vop_setattr_args *ap = v;
     register struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     register struct vattr *vap = ap->a_vap;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_SETATTR_STATS);
 
     /* Check for setattr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_SETATTR_STATS);
 	return(ENOENT);
     }
 
     if (codadebug & CODADBGMSK(CODA_SETATTR)) {
 	print_vattr(vap);
     }
     error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, p);
 
     if (!error)
 	cp->c_flags &= ~C_VATTR;
 
     {	int size = vap->va_size;
     	struct vnode *convp = cp->c_ovp;
 	if (size != VNOVAL && convp != (struct vnode *)0) {
 	    vnode_pager_setsize(convp, size);
 	}
     }
     CODADEBUG(CODA_SETATTR,	myprintf(("setattr %d\n", error)); )
     return(error);
 }
 
 int
 coda_access(v)
     void *v;
 {
 /* true args */
     struct vop_access_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     int mode = ap->a_mode;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_ACCESS_STATS);
 
     /* Check for access of control object.  Only read access is
        allowed on it. */
     if (IS_CTL_VP(vp)) {
 	/* bogus hack - all will be marked as successes */
 	MARK_INT_SAT(CODA_ACCESS_STATS);
 	return(((mode & VREAD) && !(mode & (VWRITE | VEXEC))) 
 	       ? 0 : EACCES);
     }
 
     /*
      * if the file is a directory, and we are checking exec (eg lookup) 
      * access, and the file is in the namecache, then the user must have 
      * lookup access to it.
      */
     if (coda_access_cache) {
 	if ((vp->v_type == VDIR) && (mode & VEXEC)) {
 	    if (coda_nc_lookup(cp, ".", 1, cred)) {
 		MARK_INT_SAT(CODA_ACCESS_STATS);
 		return(0);                     /* it was in the cache */
 	    }
 	}
     }
 
     error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, p);
 
     return(error);
 }
 
 int
 coda_readlink(v)
     void *v;
 {
 /* true args */
     struct vop_readlink_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct uio *uiop = ap->a_uio;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_uio->uio_procp;
 /* locals */
     int error;
     char *str;
     int len;
 
     MARK_ENTRY(CODA_READLINK_STATS);
 
     /* Check for readlink of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_READLINK_STATS);
 	return(ENOENT);
     }
 
     if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
 	uiop->uio_rw = UIO_READ;
 	error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
 	if (error)
 	    MARK_INT_FAIL(CODA_READLINK_STATS);
 	else
 	    MARK_INT_SAT(CODA_READLINK_STATS);
 	return(error);
     }
 
     error = venus_readlink(vtomi(vp), &cp->c_fid, cred, p, &str, &len);
 
     if (!error) {
 	uiop->uio_rw = UIO_READ;
 	error = uiomove(str, len, uiop);
 
 	if (coda_symlink_cache) {
 	    cp->c_symlink = str;
 	    cp->c_symlen = len;
 	    cp->c_flags |= C_SYMLINK;
 	} else
 	    CODA_FREE(str, len);
     }
 
     CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
     return(error);
 }
 
 int
 coda_fsync(v)
     void *v;
 {
 /* true args */
     struct vop_fsync_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     struct vnode *convp = cp->c_ovp;
     int error;
    
     MARK_ENTRY(CODA_FSYNC_STATS);
 
     /* Check for fsync on an unmounting object */
     /* The NetBSD kernel, in it's infinite wisdom, can try to fsync
      * after an unmount has been initiated.  This is a Bad Thing,
      * which we have to avoid.  Not a legitimate failure for stats.
      */
     if (IS_UNMOUNTING(cp)) {
 	return(ENODEV);
     }
 
     /* Check for fsync of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_FSYNC_STATS);
 	return(0);
     }
 
     if (convp)
     	VOP_FSYNC(convp, cred, MNT_WAIT, p);
 
     /*
      * We see fsyncs with usecount == 1 then usecount == 0.
      * For now we ignore them.
      */
     /*
     if (!vp->v_usecount) {
     	printf("coda_fsync on vnode %p with %d usecount.  c_flags = %x (%x)\n",
 		vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING);
     }
     */
 
     /*
      * We can expect fsync on any vnode at all if venus is pruging it.
      * Venus can't very well answer the fsync request, now can it?
      * Hopefully, it won't have to, because hopefully, venus preserves
      * the (possibly untrue) invariant that it never purges an open
      * vnode.  Hopefully.
      */
     if (cp->c_flags & C_PURGING) {
 	return(0);
     }
 
     /* needs research */
     return 0;
     error = venus_fsync(vtomi(vp), &cp->c_fid, cred, p);
 
     CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); );
     return(error);
 }
 
 int
 coda_inactive(v)
     void *v;
 {
     /* XXX - at the moment, inactive doesn't look at cred, and doesn't
        have a proc pointer.  Oops. */
 /* true args */
     struct vop_inactive_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct ucred *cred __attribute__((unused)) = NULL;
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
     /* We don't need to send inactive to venus - DCS */
     MARK_ENTRY(CODA_INACTIVE_STATS);
 
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_INACTIVE_STATS);
 	return 0;
     }
 
     CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %lx.%lx.%lx. vfsp %p\n",
 				  cp->c_fid.Volume, cp->c_fid.Vnode, 
 				  cp->c_fid.Unique, vp->v_mount));)
 
     /* If an array has been allocated to hold the symlink, deallocate it */
     if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
 	if (cp->c_symlink == NULL)
 	    panic("coda_inactive: null symlink pointer in cnode");
 	
 	CODA_FREE(cp->c_symlink, cp->c_symlen);
 	cp->c_flags &= ~C_SYMLINK;
 	cp->c_symlen = 0;
     }
 
     /* Remove it from the table so it can't be found. */
     coda_unsave(cp);
     if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) {
 	myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
 	panic("badness in coda_inactive\n");
     }
 
     if (IS_UNMOUNTING(cp)) {
 #ifdef	DEBUG
 	printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vp->v_usecount, vp, cp);
 	if (cp->c_ovp != NULL)
 	    printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n",
 	    	   vp->v_usecount, vp, cp);
 #endif
 	lockmgr(&cp->c_lock, LK_RELEASE, &vp->v_interlock, p);
     } else {
 #ifdef OLD_DIAGNOSTIC
 	if (CTOV(cp)->v_usecount) {
 	    panic("coda_inactive: nonzero reference count");
 	}
 	if (cp->c_ovp != NULL) {
 	    panic("coda_inactive:  cp->ovp != NULL");
 	}
 #endif
 	VOP_UNLOCK(vp, 0, p);
 	vgone(vp);
     }
 
     MARK_INT_SAT(CODA_INACTIVE_STATS);
     return(0);
 }
 
 /*
  * Remote file system operations having to do with directory manipulation.
  */
 
 /* 
  * It appears that in NetBSD, lookup is supposed to return the vnode locked
  */
 int
 coda_lookup(v)
     void *v;
 {
 /* true args */
     struct vop_lookup_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct vnode **vpp = ap->a_vpp;
     /* 
      * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest
      * of the string to xlate, and that we must try to get at least
      * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth.  I
      * could be wrong. 
      */
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     struct cnode *cp;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     ViceFid VFid;
     int	vtype;
     int error = 0;
 
     MARK_ENTRY(CODA_LOOKUP_STATS);
 
     CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %lx.%lx.%lx\n",
 				   nm, dcp->c_fid.Volume,
 				   dcp->c_fid.Vnode, dcp->c_fid.Unique)););
 
     /* Check for lookup of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = coda_ctlvp;
 	vref(*vpp);
 	MARK_INT_SAT(CODA_LOOKUP_STATS);
 	goto exit;
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_LOOKUP_STATS);
 	CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %lx.%lx.%lx(%s)\n",
 				    dcp->c_fid.Volume, dcp->c_fid.Vnode,
 				    dcp->c_fid.Unique, nm)););
 	*vpp = (struct vnode *)0;
 	error = EINVAL;
 	goto exit;
     }
     /* First try to look the file up in the cfs name cache */
     /* lock the parent vnode? */
     cp = coda_nc_lookup(dcp, nm, len, cred);
     if (cp) {
 	*vpp = CTOV(cp);
 	vref(*vpp);
 	CODADEBUG(CODA_LOOKUP, 
 		 myprintf(("lookup result %d vpp %p\n",error,*vpp));)
     } else {
 	
 	/* The name wasn't cached, so we need to contact Venus */
 	error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, p, &VFid, &vtype);
 	
 	if (error) {
 	    MARK_INT_FAIL(CODA_LOOKUP_STATS);
 	    CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %lx.%lx.%lx(%s)%d\n",
 					dcp->c_fid.Volume, dcp->c_fid.Vnode, dcp->c_fid.Unique, nm, error));)
 	    *vpp = (struct vnode *)0;
 	} else {
 	    MARK_INT_SAT(CODA_LOOKUP_STATS);
 	    CODADEBUG(CODA_LOOKUP, 
 		     myprintf(("lookup: vol %lx vno %lx uni %lx type %o result %d\n",
 			    VFid.Volume, VFid.Vnode, VFid.Unique, vtype,
 			    error)); )
 		
 	    cp = make_coda_node(&VFid, dvp->v_mount, vtype);
 	    *vpp = CTOV(cp);
 	    
 	    /* enter the new vnode in the Name Cache only if the top bit isn't set */
 	    /* And don't enter a new vnode for an invalid one! */
 	    if (!(vtype & CODA_NOCACHE))
 		coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 	}
     }
 
  exit:
     /* 
      * If we are creating, and this was the last name to be looked up,
      * and the error was ENOENT, then there really shouldn't be an
      * error and we can make the leaf NULL and return success.  Since
      * this is supposed to work under Mach as well as NetBSD, we're
      * leaving this fn wrapped.  We also must tell lookup/namei that
      * we need to save the last component of the name.  (Create will
      * have to free the name buffer later...lucky us...)
      */
     if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
 	&& (cnp->cn_flags & ISLASTCN)
 	&& (error == ENOENT))
     {
 	error = EJUSTRETURN;
 	cnp->cn_flags |= SAVENAME;
 	*ap->a_vpp = NULL;
     }
 
     /* 
      * If we are removing, and we are at the last element, and we
      * found it, then we need to keep the name around so that the
      * removal will go ahead as planned.  Unfortunately, this will
      * probably also lock the to-be-removed vnode, which may or may
      * not be a good idea.  I'll have to look at the bits of
      * coda_remove to make sure.  We'll only save the name if we did in
      * fact find the name, otherwise coda_remove won't have a chance
      * to free the pathname.  
      */
     if ((cnp->cn_nameiop == DELETE)
 	&& (cnp->cn_flags & ISLASTCN)
 	&& !error)
     {
 	cnp->cn_flags |= SAVENAME;
     }
 
     /* 
      * If the lookup went well, we need to (potentially?) unlock the
      * parent, and lock the child.  We are only responsible for
      * checking to see if the parent is supposed to be unlocked before
      * we return.  We must always lock the child (provided there is
      * one, and (the parent isn't locked or it isn't the same as the
      * parent.)  Simple, huh?  We can never leave the parent locked unless
      * we are ISLASTCN
      */
     if (!error || (error == EJUSTRETURN)) {
 	if (!(cnp->cn_flags & LOCKPARENT) || !(cnp->cn_flags & ISLASTCN)) {
 	    if ((error = VOP_UNLOCK(dvp, 0, p))) {
 		return error; 
 	    }	    
 	    /* 
 	     * The parent is unlocked.  As long as there is a child,
 	     * lock it without bothering to check anything else. 
 	     */
 	    if (*ap->a_vpp) {
 		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		    printf("coda_lookup: ");
 		    panic("unlocked parent but couldn't lock child");
 		}
 	    }
 	} else {
 	    /* The parent is locked, and may be the same as the child */
 	    if (*ap->a_vpp && (*ap->a_vpp != dvp)) {
 		/* Different, go ahead and lock it. */
 		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		    printf("coda_lookup: ");
 		    panic("unlocked parent but couldn't lock child");
 		}
 	    }
 	}
     } else {
 	/* If the lookup failed, we need to ensure that the leaf is NULL */
 	/* Don't change any locking? */
 	*ap->a_vpp = NULL;
     }
     return(error);
 }
 
 /*ARGSUSED*/
 int
 coda_create(v)
     void *v;
 {
 /* true args */
     struct vop_create_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct vattr *va = ap->a_vap;
     int exclusive = 1;
     int mode = ap->a_vap->va_mode;
     struct vnode **vpp = ap->a_vpp;
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     struct cnode *cp;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     ViceFid VFid;
     struct vattr attr;
 
     MARK_ENTRY(CODA_CREATE_STATS);
 
     /* All creates are exclusive XXX */
     /* I'm assuming the 'mode' argument is the file mode bits XXX */
 
     /* Check for create of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_CREATE_STATS);
 	return(EACCES);
     }
 
     error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, p, &VFid, &attr);
 
     if (!error) {
 	
 	/* If this is an exclusive create, panic if the file already exists. */
 	/* Venus should have detected the file and reported EEXIST. */
 
 	if ((exclusive == 1) &&
 	    (coda_find(&VFid) != NULL))
 	    panic("cnode existed for newly created file!");
 	
 	cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
 	*vpp = CTOV(cp);
 	
 	/* Update va to reflect the new attributes. */
 	(*va) = attr;
 	
 	/* Update the attribute cache and mark it as valid */
 	if (coda_attr_cache) {
 	    VTOC(*vpp)->c_vattr = attr;
 	    VTOC(*vpp)->c_flags |= C_VATTR;       
 	}
 
 	/* Invalidate the parent's attr cache, the modification time has changed */
 	VTOC(dvp)->c_flags &= ~C_VATTR;
 	
 	/* enter the new vnode in the Name Cache */
 	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 	
 	CODADEBUG(CODA_CREATE, 
 		 myprintf(("create: (%lx.%lx.%lx), result %d\n",
 			VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
     } else {
 	*vpp = (struct vnode *)0;
 	CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));)
     }
 
     if (!error) {
 	if (cnp->cn_flags & LOCKLEAF) {
 	    if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		printf("coda_create: ");
 		panic("unlocked parent but couldn't lock child");
 	    }
 	}
 #ifdef OLD_DIAGNOSTIC
 	else {
 	    printf("coda_create: LOCKLEAF not set!\n");
 	}
 #endif
     }
     return(error);
 }
 
 int
 coda_remove(v)
     void *v;
 {
 /* true args */
     struct vop_remove_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *cp = VTOC(dvp);
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *tp;
 
     MARK_ENTRY(CODA_REMOVE_STATS);
 
     CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %lx.%lx.%lx\n",
 				   nm, cp->c_fid.Volume, cp->c_fid.Vnode,
 				   cp->c_fid.Unique)););
 
     /* Remove the file's entry from the CODA Name Cache */
     /* We're being conservative here, it might be that this person
      * doesn't really have sufficient access to delete the file
      * but we feel zapping the entry won't really hurt anyone -- dcs
      */
     /* I'm gonna go out on a limb here. If a file and a hardlink to it
      * exist, and one is removed, the link count on the other will be
      * off by 1. We could either invalidate the attrs if cached, or
      * fix them. I'll try to fix them. DCS 11/8/94
      */
     tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
     if (tp) {
 	if (VALID_VATTR(tp)) {	/* If attrs are cached */
 	    if (tp->c_vattr.va_nlink > 1) {	/* If it's a hard link */
 		tp->c_vattr.va_nlink--;
 	    }
 	}
 	
 	coda_nc_zapfile(VTOC(dvp), nm, len); 
 	/* No need to flush it if it doesn't exist! */
     }
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(dvp)->c_flags &= ~C_VATTR;
 
     /* Check for remove of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	MARK_INT_FAIL(CODA_REMOVE_STATS);
 	return(ENOENT);
     }
 
     error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, p);
 
     CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )
 
     return(error);
 }
 
 int
 coda_link(v)
     void *v;
 {
 /* true args */
     struct vop_link_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct vnode *tdvp = ap->a_tdvp;
     struct cnode *tdcp = VTOC(tdvp);
     struct componentname *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
 
     MARK_ENTRY(CODA_LINK_STATS);
 
     if (codadebug & CODADBGMSK(CODA_LINK)) {
 
 	myprintf(("nb_link:   vp fid: (%lx.%lx.%lx)\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
 	myprintf(("nb_link: tdvp fid: (%lx.%lx.%lx)\n",
 		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
 	
     }
     if (codadebug & CODADBGMSK(CODA_LINK)) {
 	myprintf(("link:   vp fid: (%lx.%lx.%lx)\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
 	myprintf(("link: tdvp fid: (%lx.%lx.%lx)\n",
 		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
 
     }
 
     /* Check for link to/from control object. */
     if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_LINK_STATS);
 	return(EACCES);
     }
 
     error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, p);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(tdvp)->c_flags &= ~C_VATTR;
     VTOC(vp)->c_flags &= ~C_VATTR;
 
     CODADEBUG(CODA_LINK,	myprintf(("in link result %d\n",error)); )
 
     return(error);
 }
 
 int
 coda_rename(v)
     void *v;
 {
 /* true args */
     struct vop_rename_args *ap = v;
     struct vnode *odvp = ap->a_fdvp;
     struct cnode *odcp = VTOC(odvp);
     struct componentname  *fcnp = ap->a_fcnp;
     struct vnode *ndvp = ap->a_tdvp;
     struct cnode *ndcp = VTOC(ndvp);
     struct componentname  *tcnp = ap->a_tcnp;
     struct ucred *cred = fcnp->cn_cred;
     struct proc *p = fcnp->cn_proc;
 /* true args */
     int error;
     const char *fnm = fcnp->cn_nameptr;
     int flen = fcnp->cn_namelen;
     const char *tnm = tcnp->cn_nameptr;
     int tlen = tcnp->cn_namelen;
 
     MARK_ENTRY(CODA_RENAME_STATS);
 
     /* Hmmm.  The vnodes are already looked up.  Perhaps they are locked?
        This could be Bad. XXX */
 #ifdef OLD_DIAGNOSTIC
     if ((fcnp->cn_cred != tcnp->cn_cred)
 	|| (fcnp->cn_proc != tcnp->cn_proc))
     {
 	panic("coda_rename: component names don't agree");
     }
 #endif
 
     /* Check for rename involving control object. */ 
     if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	return(EACCES);
     }
 
     /* Problem with moving directories -- need to flush entry for .. */
     if (odvp != ndvp) {
 	struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
 	if (ovcp) {
 	    struct vnode *ovp = CTOV(ovcp);
 	    if ((ovp) &&
 		(ovp->v_type == VDIR)) /* If it's a directory */
 		coda_nc_zapfile(VTOC(ovp),"..", 2);
 	}
     }
 
     /* Remove the entries for both source and target files */
     coda_nc_zapfile(VTOC(odvp), fnm, flen);
     coda_nc_zapfile(VTOC(ndvp), tnm, tlen);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(odvp)->c_flags &= ~C_VATTR;
     VTOC(ndvp)->c_flags &= ~C_VATTR;
 
     if (flen+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     if (tlen+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, p);
 
  exit:
     CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
     /* XXX - do we need to call cache pureg on the moved vnode? */
     cache_purge(ap->a_fvp);
 
     /* It seems to be incumbent on us to drop locks on all four vnodes */
     /* From-vnodes are not locked, only ref'd.  To-vnodes are locked. */
 
     vrele(ap->a_fvp);
     vrele(odvp);
 
     if (ap->a_tvp) {
 	if (ap->a_tvp == ndvp) {
 	    vrele(ap->a_tvp);
 	} else {
 	    vput(ap->a_tvp);
 	}
     }
 
     vput(ndvp);
     return(error);
 }
 
 int
 coda_mkdir(v)
     void *v;
 {
 /* true args */
     struct vop_mkdir_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);	
     struct componentname  *cnp = ap->a_cnp;
     register struct vattr *va = ap->a_vap;
     struct vnode **vpp = ap->a_vpp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *cp;
     ViceFid VFid;
     struct vattr ova;
 
     MARK_ENTRY(CODA_MKDIR_STATS);
 
     /* Check for mkdir of target object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_MKDIR_STATS);
 	return(EACCES);
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_MKDIR_STATS);
 	return(EACCES);
     }
 
     error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, p, &VFid, &ova);
 
     if (!error) {
 	if (coda_find(&VFid) != NULL)
 	    panic("cnode existed for newly created directory!");
 	
 	
 	cp =  make_coda_node(&VFid, dvp->v_mount, va->va_type);
 	*vpp = CTOV(cp);
 	
 	/* enter the new vnode in the Name Cache */
 	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 
 	/* as a side effect, enter "." and ".." for the directory */
 	coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
 	coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));
 
 	if (coda_attr_cache) {
 	    VTOC(*vpp)->c_vattr = ova;		/* update the attr cache */
 	    VTOC(*vpp)->c_flags |= C_VATTR;	/* Valid attributes in cnode */
 	}
 
 	/* Invalidate the parent's attr cache, the modification time has changed */
 	VTOC(dvp)->c_flags &= ~C_VATTR;
 	
 	CODADEBUG( CODA_MKDIR, myprintf(("mkdir: (%lx.%lx.%lx) result %d\n",
 				    VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
     } else {
 	*vpp = (struct vnode *)0;
 	CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));)
     }
 
     return(error);
 }
 
 int
 coda_rmdir(v)
     void *v;
 {
 /* true args */
     struct vop_rmdir_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* true args */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *cp;
    
     MARK_ENTRY(CODA_RMDIR_STATS);
 
     /* Check for rmdir of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	MARK_INT_FAIL(CODA_RMDIR_STATS);
 	return(ENOENT);
     }
 
     /* We're being conservative here, it might be that this person
      * doesn't really have sufficient access to delete the file
      * but we feel zapping the entry won't really hurt anyone -- dcs
      */
     /*
      * As a side effect of the rmdir, remove any entries for children of
      * the directory, especially "." and "..".
      */
     cp = coda_nc_lookup(dcp, nm, len, cred);
     if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);
 
     /* Remove the file's entry from the CODA Name Cache */
     coda_nc_zapfile(dcp, nm, len);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     dcp->c_flags &= ~C_VATTR;
 
     error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, p);
 
     CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )
 
     return(error);
 }
 
 int
 coda_symlink(v)
     void *v;
 {
 /* true args */
     struct vop_symlink_args *ap = v;
     struct vnode *tdvp = ap->a_dvp;
     struct cnode *tdcp = VTOC(tdvp);	
     struct componentname *cnp = ap->a_cnp;
     struct vattr *tva = ap->a_vap;
     char *path = ap->a_target;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
     struct vnode **vpp = ap->a_vpp;
 /* locals */
     int error;
     /* 
      * XXX I'm assuming the following things about coda_symlink's
      * arguments: 
      *       t(foo) is the new name/parent/etc being created.
      *       lname is the contents of the new symlink. 
      */
     char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     int plen = strlen(path);
 
     /* 
      * Here's the strategy for the moment: perform the symlink, then
      * do a lookup to grab the resulting vnode.  I know this requires
      * two communications with Venus for a new sybolic link, but
      * that's the way the ball bounces.  I don't yet want to change
      * the way the Mach symlink works.  When Mach support is
      * deprecated, we should change symlink so that the common case
      * returns the resultant vnode in a vpp argument.
      */
 
     MARK_ENTRY(CODA_SYMLINK_STATS);
 
     /* Check for symlink of control object. */
     if (IS_CTL_NAME(tdvp, nm, len)) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	return(EACCES);
     }
 
     if (plen+1 > CODA_MAXPATHLEN) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	return(EINVAL);
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, p);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     tdcp->c_flags &= ~C_VATTR;
 
     if (error == 0)
 	error = VOP_LOOKUP(tdvp, vpp, cnp);
 
  exit:    
     CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
     return(error);
 }
 
 /*
  * Read directory entries.
  */
 int
 coda_readdir(v)
     void *v;
 {
 /* true args */
     struct vop_readdir_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     register struct uio *uiop = ap->a_uio;
     struct ucred *cred = ap->a_cred;
     int *eofflag = ap->a_eofflag;
     u_long **cookies = ap->a_cookies;
     int *ncookies = ap->a_ncookies;
     struct proc *p = ap->a_uio->uio_procp;
 /* upcall decl */
 /* locals */
     int error = 0;
 
     MARK_ENTRY(CODA_READDIR_STATS);
 
     CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %lld, %d)\n",
 				      (void *)uiop->uio_iov->iov_base,
 				      uiop->uio_resid,
 				      (long long)uiop->uio_offset,
 				      uiop->uio_segflg)); )
 	
     /* Check for readdir of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_READDIR_STATS);
 	return(ENOENT);
     }
 
     {
 	/* If directory is not already open do an "internal open" on it. */
 	int opened_internally = 0;
 	if (cp->c_ovp == NULL) {
 	    opened_internally = 1;
 	    MARK_INT_GEN(CODA_OPEN_STATS);
 	    error = VOP_OPEN(vp, FREAD, cred, p);
 printf("coda_readdir: Internally Opening %p\n", vp);
 	    if (error) {
 		printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
 		return (error);
 	    }
 	    if (vp->v_type == VREG) {
 		error = vfs_object_create(vp, p, cred);
 		if (error != 0) {
 		    printf("coda_readdir: vfs_object_create() returns %d\n", error);
 		    vput(vp);
 		}
 	    }
 	    if (error) return(error);
 	}
 	
 	/* Have UFS handle the call. */
 	CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = (%lx.%lx.%lx), refcnt = %d\n",cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique, vp->v_usecount)); )
 	error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
 			       cookies);
 	
 	if (error)
 	    MARK_INT_FAIL(CODA_READDIR_STATS);
 	else
 	    MARK_INT_SAT(CODA_READDIR_STATS);
 	
 	/* Do an "internal close" if necessary. */ 
 	if (opened_internally) {
 	    MARK_INT_GEN(CODA_CLOSE_STATS);
 	    (void)VOP_CLOSE(vp, FREAD, cred, p);
 	}
     }
 
     return(error);
 }
 
 /*
  * Convert from file system blocks to device blocks
  */
 int
 coda_bmap(v)
     void *v;
 {
     /* XXX on the global proc */
 /* true args */
     struct vop_bmap_args *ap = v;
     struct vnode *vp __attribute__((unused)) = ap->a_vp;	/* file's vnode */
     daddr_t bn __attribute__((unused)) = ap->a_bn;	/* fs block number */
     struct vnode **vpp = ap->a_vpp;			/* RETURN vp of device */
     daddr_t *bnp __attribute__((unused)) = ap->a_bnp;	/* RETURN device block number */
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
 	int ret = 0;
 	struct cnode *cp;
 
 	cp = VTOC(vp);
 	if (cp->c_ovp) {
 		return EINVAL;
 		ret =  VOP_BMAP(cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb);
 #if	0
 		printf("VOP_BMAP(cp->c_ovp %p, bn %p, vpp %p, bnp %p, ap->a_runp %p, ap->a_runb %p) = %d\n",
 			cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb, ret);
 #endif
 		return ret;
 	} else {
 #if	0
 		printf("coda_bmap: no container\n");
 #endif
 		return(EOPNOTSUPP);
 	}
 }
 
 /*
  * I don't think the following two things are used anywhere, so I've
  * commented them out 
  * 
  * struct buf *async_bufhead; 
  * int async_daemon_count;
  */
 int
 coda_strategy(v)
     void *v;
 {
 /* true args */
     struct vop_strategy_args *ap = v;
     register struct buf *bp __attribute__((unused)) = ap->a_bp;
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
 	printf("coda_strategy: called ???\n");
 	return(EOPNOTSUPP);
 }
 
 int
 coda_reclaim(v) 
     void *v;
 {
 /* true args */
     struct vop_reclaim_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
 /* upcall decl */
 /* locals */
 
 /*
  * Forced unmount/flush will let vnodes with non zero use be destroyed!
  */
     ENTRY;
 
     if (IS_UNMOUNTING(cp)) {
 #ifdef	DEBUG
 	if (VTOC(vp)->c_ovp) {
 	    if (IS_UNMOUNTING(cp))
 		printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp);
 	}
 #endif
     } else {
 #ifdef OLD_DIAGNOSTIC
 	if (vp->v_usecount != 0) 
 	    print("coda_reclaim: pushing active %p\n", vp);
 	if (VTOC(vp)->c_ovp) {
 	    panic("coda_reclaim: c_ovp not void");
     }
 #endif
     }	
     cache_purge(vp);
     lockdestroy(&(VTOC(vp)->c_lock));
     coda_free(VTOC(vp));
     VTOC(vp) = NULL;
     return (0);
 }
 
 int
 coda_lock(v)
     void *v;
 {
 /* true args */
     struct vop_lock_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct proc  *p = ap->a_p;
 /* upcall decl */
 /* locals */
 
     ENTRY;
 
     if (coda_lockdebug) {
 	myprintf(("Attempting lock on %lx.%lx.%lx\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
     }
 
 #ifndef	DEBUG_LOCKS
     return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p));
 #else
     return (debuglockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p,
 			 "coda_lock", vp->filename, vp->line));
 #endif
 }
 
 int
 coda_unlock(v)
     void *v;
 {
 /* true args */
     struct vop_unlock_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct proc  *p = ap->a_p;
 /* upcall decl */
 /* locals */
 
     ENTRY;
     if (coda_lockdebug) {
 	myprintf(("Attempting unlock on %lx.%lx.%lx\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
     }
 
     return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, p));
 }
 
 int
 coda_islocked(v)
     void *v;
 {
 /* true args */
     struct vop_islocked_args *ap = v;
     struct cnode *cp = VTOC(ap->a_vp);
     ENTRY;
 
     return (lockstatus(&cp->c_lock, ap->a_p));
 }
 
 /* How one looks up a vnode given a device/inode pair: */
 int
 coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp)
 {
     /* This is like VFS_VGET() or igetinode()! */
     int           error;
     struct mount *mp;
 
     if (!(mp = devtomp(dev))) {
 	myprintf(("coda_grab_vnode: devtomp(%#lx) returns NULL\n",
 		  (u_long)dev2udev(dev)));
 	return(ENXIO);
     }
 
     /* XXX - ensure that nonzero-return means failure */
     error = VFS_VGET(mp,ino,vpp);
     if (error) {
 	myprintf(("coda_grab_vnode: iget/vget(%lx, %lu) returns %p, err %d\n", 
 		  (u_long)dev2udev(dev), (u_long)ino, (void *)*vpp, error));
 	return(ENOENT);
     }
     return(0);
 }
 
 void
 print_vattr( attr )
 	struct vattr *attr;
 {
     char *typestr;
 
     switch (attr->va_type) {
     case VNON:
 	typestr = "VNON";
 	break;
     case VREG:
 	typestr = "VREG";
 	break;
     case VDIR:
 	typestr = "VDIR";
 	break;
     case VBLK:
 	typestr = "VBLK";
 	break;
     case VCHR:
 	typestr = "VCHR";
 	break;
     case VLNK:
 	typestr = "VLNK";
 	break;
     case VSOCK:
 	typestr = "VSCK";
 	break;
     case VFIFO:
 	typestr = "VFFO";
 	break;
     case VBAD:
 	typestr = "VBAD";
 	break;
     default:
 	typestr = "????";
 	break;
     }
 
 
     myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
 	      typestr, (int)attr->va_mode, (int)attr->va_uid,
 	      (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));
 
     myprintf(("      fileid %d nlink %d size %d blocksize %d bytes %d\n",
 	      (int)attr->va_fileid, (int)attr->va_nlink, 
 	      (int)attr->va_size,
 	      (int)attr->va_blocksize,(int)attr->va_bytes));
     myprintf(("      gen %ld flags %ld vaflags %d\n",
 	      attr->va_gen, attr->va_flags, attr->va_vaflags));
     myprintf(("      atime sec %d nsec %d\n",
 	      (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
     myprintf(("      mtime sec %d nsec %d\n",
 	      (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
     myprintf(("      ctime sec %d nsec %d\n",
 	      (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
 }
 
 /* How to print a ucred */
 void
 print_cred(cred)
 	struct ucred *cred;
 {
 
 	int i;
 
 	myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid));
 
 	for (i=0; i < cred->cr_ngroups; i++)
 		myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i]));
 	myprintf(("\n"));
 
 }
 
 /*
  * Return a vnode for the given fid.
  * If no cnode exists for this fid create one and put it
  * in a table hashed by fid.Volume and fid.Vnode.  If the cnode for
  * this fid is already in the table return it (ref count is
  * incremented by coda_find.  The cnode will be flushed from the
  * table when coda_inactive calls coda_unsave.
  */
 struct cnode *
 make_coda_node(fid, vfsp, type)
      ViceFid *fid; struct mount *vfsp; short type;
 {
     struct cnode *cp;
     int          err;
 
     if ((cp = coda_find(fid)) == NULL) {
 	struct vnode *vp;
 	
 	cp = coda_alloc();
 	lockinit(&cp->c_lock, PINOD, "cnode", 0, 0);
 	cp->c_fid = *fid;
 	
 	err = getnewvnode(VT_CODA, vfsp, coda_vnodeop_p, &vp);  
 	if (err) {                                                
 	    panic("coda: getnewvnode returned error %d\n", err);   
 	}                                                         
 	vp->v_data = cp;                                          
 	vp->v_type = type;                                      
 	cp->c_vnode = vp;                                         
 	coda_save(cp);
 	
     } else {
 	vref(CTOV(cp));
     }
 
     return cp;
 }
Index: head/sys/fs/coda/coda_vnops.c
===================================================================
--- head/sys/fs/coda/coda_vnops.c	(revision 75579)
+++ head/sys/fs/coda/coda_vnops.c	(revision 75580)
@@ -1,1957 +1,1956 @@
 /*
  * 
  *             Coda: an Experimental Distributed File System
  *                              Release 3.1
  * 
  *           Copyright (c) 1987-1998 Carnegie Mellon University
  *                          All Rights Reserved
  * 
  * Permission  to  use, copy, modify and distribute this software and its
  * documentation is hereby granted,  provided  that  both  the  copyright
  * notice  and  this  permission  notice  appear  in  all  copies  of the
  * software, derivative works or  modified  versions,  and  any  portions
  * thereof, and that both notices appear in supporting documentation, and
  * that credit is given to Carnegie Mellon University  in  all  documents
  * and publicity pertaining to direct or indirect use of this code or its
  * derivatives.
  * 
  * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
  * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
  * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
  * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
  * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
  * ANY DERIVATIVE WORK.
  * 
  * Carnegie  Mellon  encourages  users  of  this  software  to return any
  * improvements or extensions that  they  make,  and  to  grant  Carnegie
  * Mellon the rights to redistribute these changes without encumbrance.
  * 
  *  	@(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
  * $FreeBSD$
  * 
  */
 
 /* 
  * Mach Operating System
  * Copyright (c) 1990 Carnegie-Mellon University
  * Copyright (c) 1989 Carnegie-Mellon University
  * All rights reserved.  The CMU software License Agreement specifies
  * the terms and conditions for use and redistribution.
  */
 
 /*
  * This code was written for the Coda file system at Carnegie Mellon
  * University.  Contributers include David Steere, James Kistler, and
  * M. Satyanarayanan.  
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/errno.h>
 #include <sys/acct.h>
 #include <sys/file.h>
 #include <sys/fcntl.h>
 #include <sys/uio.h>
 #include <sys/namei.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <coda/coda.h>
 #include <coda/cnode.h>
 #include <coda/coda_vnops.h>
 #include <coda/coda_venus.h>
 #include <coda/coda_opstats.h>
 #include <coda/coda_subr.h>
 #include <coda/coda_namecache.h>
 #include <coda/coda_pioctl.h>
 
 /* 
  * These flags select various performance enhancements.
  */
 int coda_attr_cache  = 1;       /* Set to cache attributes in the kernel */
 int coda_symlink_cache = 1;     /* Set to cache symbolic link information */
 int coda_access_cache = 1;      /* Set to handle some access checks directly */
 
 /* structure to keep track of vfs calls */
 
 struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];
 
 #define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
 #define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
 #define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
 #define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)
 
 /* What we are delaying for in printf */
 int coda_printf_delay = 0;  /* in microseconds */
 int coda_vnop_print_entry = 0;
 static int coda_lockdebug = 0;
 
 /* Definition of the vfs operation vector */
 
 /*
  * Some NetBSD details:
  * 
  *   coda_start is called at the end of the mount syscall.
  *   coda_init is called at boot time.
  */
 
 #define ENTRY  if(coda_vnop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
 
 /* Definition of the vnode operation vector */
 
 struct vnodeopv_entry_desc coda_vnodeop_entries[] = {
     { &vop_default_desc, coda_vop_error },
     { &vop_lookup_desc, coda_lookup },		/* lookup */
     { &vop_create_desc, coda_create },		/* create */
     { &vop_mknod_desc, coda_vop_error },	/* mknod */
     { &vop_open_desc, coda_open },		/* open */
     { &vop_close_desc, coda_close },		/* close */
     { &vop_access_desc, coda_access },		/* access */
     { &vop_getattr_desc, coda_getattr },	/* getattr */
     { &vop_setattr_desc, coda_setattr },	/* setattr */
     { &vop_read_desc, coda_read },		/* read */
     { &vop_write_desc, coda_write },		/* write */
     { &vop_ioctl_desc, coda_ioctl },		/* ioctl */
     { &vop_fsync_desc, coda_fsync },		/* fsync */
     { &vop_remove_desc, coda_remove },		/* remove */
     { &vop_link_desc, coda_link },		/* link */
     { &vop_rename_desc, coda_rename },		/* rename */
     { &vop_mkdir_desc, coda_mkdir },		/* mkdir */
     { &vop_rmdir_desc, coda_rmdir },		/* rmdir */
     { &vop_symlink_desc, coda_symlink },	/* symlink */
     { &vop_readdir_desc, coda_readdir },	/* readdir */
     { &vop_readlink_desc, coda_readlink },	/* readlink */
     { &vop_inactive_desc, coda_inactive },	/* inactive */
     { &vop_reclaim_desc, coda_reclaim },	/* reclaim */
     { &vop_lock_desc, coda_lock },		/* lock */
     { &vop_unlock_desc, coda_unlock },		/* unlock */
     { &vop_bmap_desc, coda_bmap },		/* bmap */
     { &vop_strategy_desc, coda_strategy },	/* strategy */
     { &vop_print_desc, coda_vop_error },	/* print */
     { &vop_islocked_desc, coda_islocked },	/* islocked */
     { &vop_pathconf_desc, coda_vop_error },	/* pathconf */
     { &vop_advlock_desc, coda_vop_nop },	/* advlock */
-    { &vop_bwrite_desc, coda_vop_error },	/* bwrite */
     { &vop_lease_desc, coda_vop_nop },		/* lease */
     { &vop_poll_desc, (vop_t *) vop_stdpoll },
     { &vop_getpages_desc, coda_fbsd_getpages },	/* pager intf.*/
     { &vop_putpages_desc, coda_fbsd_putpages },	/* pager intf.*/
 
 #if	0
 
     we need to define these someday
 #define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
 #define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
 #define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
 #define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
 #define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
 
     missing
     { &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
     { &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
     { &vop_whiteout_desc,	(vop_t *) ufs_whiteout },
 #endif
     { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
 };
 
 static struct vnodeopv_desc coda_vnodeop_opv_desc =
 		{ &coda_vnodeop_p, coda_vnodeop_entries };
 
 VNODEOP_SET(coda_vnodeop_opv_desc);
 
 /* A generic panic: we were called with something we didn't define yet */
 int
 coda_vop_error(void *anon) {
     struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
 
     myprintf(("coda_vop_error: Vnode operation %s called, but not defined.\n",
 	      (*desc)->vdesc_name));
     /*
     panic("coda_vop_error");
     */
     return EIO;
 }
 
 /* A generic do-nothing.  For lease_check, advlock */
 int
 coda_vop_nop(void *anon) {
     struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
 
     if (codadebug) {
 	myprintf(("Vnode operation %s called, but unsupported\n",
 		  (*desc)->vdesc_name));
     } 
    return (0);
 }
 
 int
 coda_vnodeopstats_init(void)
 {
 	register int i;
 	
 	for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
 		coda_vnodeopstats[i].opcode = i;
 		coda_vnodeopstats[i].entries = 0;
 		coda_vnodeopstats[i].sat_intrn = 0;
 		coda_vnodeopstats[i].unsat_intrn = 0;
 		coda_vnodeopstats[i].gen_intrn = 0;
 	}
 	return 0;
 }
 		
 /* 
  * coda_open calls Venus to return the device, inode pair of the cache
  * file holding the data. Using iget, coda_open finds the vnode of the
  * cache file, and then opens it.
  */
 int
 coda_open(v)
     void *v;
 {
     /* 
      * NetBSD can pass the O_EXCL flag in mode, even though the check
      * has already happened.  Venus defensively assumes that if open
      * is passed the EXCL, it must be a bug.  We strip the flag here.
      */
 /* true args */
     struct vop_open_args *ap = v;
     register struct vnode **vpp = &(ap->a_vp);
     struct cnode *cp = VTOC(*vpp);
     int flag = ap->a_mode & (~O_EXCL);
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
     struct vnode *vp;
     dev_t dev;
     ino_t inode;
 
     MARK_ENTRY(CODA_OPEN_STATS);
 
     /* Check for open of control file. */
     if (IS_CTL_VP(*vpp)) {
 	/* XXX */
 	/* if (WRITEABLE(flag)) */ 
 	if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
 	    MARK_INT_FAIL(CODA_OPEN_STATS);
 	    return(EACCES);
 	}
 	MARK_INT_SAT(CODA_OPEN_STATS);
 	return(0);
     }
 
     error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, p, &dev, &inode);
     if (error)
 	return (error);
     if (!error) {
 	CODADEBUG( CODA_OPEN,myprintf(("open: dev %#lx inode %lu result %d\n",
 				       (u_long)dev2udev(dev), (u_long)inode,
 				       error)); )
     }
 
     /* Translate the <device, inode> pair for the cache file into
        an inode pointer. */
     error = coda_grab_vnode(dev, inode, &vp);
     if (error)
 	return (error);
 
     /* We get the vnode back locked.  Needs unlocked */
     VOP_UNLOCK(vp, 0, p);
     /* Keep a reference until the close comes in. */
     vref(*vpp);                
 
     /* Save the vnode pointer for the cache file. */
     if (cp->c_ovp == NULL) {
 	cp->c_ovp = vp;
     } else {
 	if (cp->c_ovp != vp)
 	    panic("coda_open:  cp->c_ovp != ITOV(ip)");
     }
     cp->c_ocount++;
 
     /* Flush the attribute cached if writing the file. */
     if (flag & FWRITE) {
 	cp->c_owrite++;
 	cp->c_flags &= ~C_VATTR;
     }
 
     /* Save the <device, inode> pair for the cache file to speed
        up subsequent page_read's. */
     cp->c_device = dev;
     cp->c_inode = inode;
 
     /* Open the cache file. */
     error = VOP_OPEN(vp, flag, cred, p); 
     if (error) {
     	printf("coda_open: VOP_OPEN on container failed %d\n", error);
 	return (error);
     }
 /* grab (above) does this when it calls newvnode unless it's in the cache*/
     if (vp->v_type == VREG) {
     	error = vfs_object_create(vp, p, cred);
 	if (error != 0) {
 	    printf("coda_open: vfs_object_create() returns %d\n", error);
 	    vput(vp);
 	}
     }
 
     return(error);
 }
 
 /*
  * Close the cache file used for I/O and notify Venus.
  */
 int
 coda_close(v)
     void *v;
 {
 /* true args */
     struct vop_close_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     int flag = ap->a_fflag;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_CLOSE_STATS);
 
     /* Check for close of control file. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_CLOSE_STATS);
 	return(0);
     }
 
     if (IS_UNMOUNTING(cp)) {
 	if (cp->c_ovp) {
 #ifdef	CODA_VERBOSE
 	    printf("coda_close: destroying container ref %d, ufs vp %p of vp %p/cp %p\n",
 		    vp->v_usecount, cp->c_ovp, vp, cp);
 #endif
 #ifdef	hmm
 	    vgone(cp->c_ovp);
 #else
 	    VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
 	    vrele(cp->c_ovp);
 #endif
 	} else {
 #ifdef	CODA_VERBOSE
 	    printf("coda_close: NO container vp %p/cp %p\n", vp, cp);
 #endif
 	}
 	return ENODEV;
     } else {
 	VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
 	vrele(cp->c_ovp);
     }
 
     if (--cp->c_ocount == 0)
 	cp->c_ovp = NULL;
 
     if (flag & FWRITE)                    /* file was opened for write */
 	--cp->c_owrite;
 
     error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, p);
     vrele(CTOV(cp));
 
     CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); )
     return(error);
 }
 
 int
 coda_read(v)
     void *v;
 {
     struct vop_read_args *ap = v;
 
     ENTRY;
     return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
 		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
 }
 
 int
 coda_write(v)
     void *v;
 {
     struct vop_write_args *ap = v;
 
     ENTRY;
     return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
 		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
 }
 
 int
 coda_rdwr(vp, uiop, rw, ioflag, cred, p)
     struct vnode *vp;
     struct uio *uiop;
     enum uio_rw rw;
     int ioflag;
     struct ucred *cred;
     struct proc *p;
 { 
 /* upcall decl */
   /* NOTE: container file operation!!! */
 /* locals */
     struct cnode *cp = VTOC(vp);
     struct vnode *cfvp = cp->c_ovp;
     int igot_internally = 0;
     int opened_internally = 0;
     int error = 0;
 
     MARK_ENTRY(CODA_RDWR_STATS);
 
     CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %lld, %d)\n", rw, 
 			      (void *)uiop->uio_iov->iov_base, uiop->uio_resid, 
 			      (long long)uiop->uio_offset, uiop->uio_segflg)); )
 	
     /* Check for rdwr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_RDWR_STATS);
 	return(EINVAL);
     }
 
     /* 
      * If file is not already open this must be a page
      * {read,write} request.  Iget the cache file's inode
      * pointer if we still have its <device, inode> pair.
      * Otherwise, we must do an internal open to derive the
      * pair. 
      */
     if (cfvp == NULL) {
 	/* 
 	 * If we're dumping core, do the internal open. Otherwise
 	 * venus won't have the correct size of the core when
 	 * it's completely written.
 	 */
 	PROC_LOCK(p);
 	if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) { 
 	    PROC_UNLOCK(p);
 	    igot_internally = 1;
 	    error = coda_grab_vnode(cp->c_device, cp->c_inode, &cfvp);
 	    if (error) {
 		MARK_INT_FAIL(CODA_RDWR_STATS);
 		return(error);
 	    }
 	    /* 
 	     * We get the vnode back locked in both Mach and
 	     * NetBSD.  Needs unlocked 
 	     */
 	    VOP_UNLOCK(cfvp, 0, p);
 	}
 	else {
 	    PROC_UNLOCK(p);
 	    opened_internally = 1;
 	    MARK_INT_GEN(CODA_OPEN_STATS);
 	    error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), 
 			     cred, p);
 printf("coda_rdwr: Internally Opening %p\n", vp);
 	    if (error) {
 		printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
 		return (error);
 	    }
 	    if (vp->v_type == VREG) {
 		error = vfs_object_create(vp, p, cred);
 		if (error != 0) {
 		    printf("coda_rdwr: vfs_object_create() returns %d\n", error);
 		    vput(vp);
 		}
 	    }
 	    if (error) {
 		MARK_INT_FAIL(CODA_RDWR_STATS);
 		return(error);
 	    }
 	    cfvp = cp->c_ovp;
 	}
     }
 
     /* Have UFS handle the call. */
     CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = (%lx.%lx.%lx), refcnt = %d\n",
 			      cp->c_fid.Volume, cp->c_fid.Vnode, 
 			      cp->c_fid.Unique, CTOV(cp)->v_usecount)); )
 
 
     if (rw == UIO_READ) {
 	error = VOP_READ(cfvp, uiop, ioflag, cred);
     } else {
 	error = VOP_WRITE(cfvp, uiop, ioflag, cred);
 	/* ufs_write updates the vnode_pager_setsize for the vnode/object */
 
 	{   struct vattr attr;
 
 	    if (VOP_GETATTR(cfvp, &attr, cred, p) == 0) {
 		vnode_pager_setsize(vp, attr.va_size);
 	    }
 	}
     }
 
     if (error)
 	MARK_INT_FAIL(CODA_RDWR_STATS);
     else
 	MARK_INT_SAT(CODA_RDWR_STATS);
 
     /* Do an internal close if necessary. */
     if (opened_internally) {
 	MARK_INT_GEN(CODA_CLOSE_STATS);
 	(void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, p);
     }
 
     /* Invalidate cached attributes if writing. */
     if (rw == UIO_WRITE)
 	cp->c_flags &= ~C_VATTR;
     return(error);
 }
 
 int
 coda_ioctl(v)
     void *v;
 {
 /* true args */
     struct vop_ioctl_args *ap = v;
     struct vnode *vp = ap->a_vp;
     int com = ap->a_command;
     caddr_t data = ap->a_data;
     int flag = ap->a_fflag;
     struct ucred *cred = ap->a_cred;
     struct proc  *p = ap->a_p;
 /* locals */
     int error;
     struct vnode *tvp;
     struct nameidata ndp;
     struct PioctlData *iap = (struct PioctlData *)data;
 
     MARK_ENTRY(CODA_IOCTL_STATS);
 
     CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)
 	
     /* Don't check for operation on a dying object, for ctlvp it
        shouldn't matter */
 	
     /* Must be control object to succeed. */
     if (!IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));)
 	    return (EOPNOTSUPP);
     }
     /* Look up the pathname. */
 
     /* Should we use the name cache here? It would get it from
        lookupname sooner or later anyway, right? */
 
     NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, p);
     error = namei(&ndp);
     tvp = ndp.ni_vp;
 
     if (error) {
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n",
 				   error));)
 	return(error);
     }
 
     /* 
      * Make sure this is a coda style cnode, but it may be a
      * different vfsp 
      */
     if (tvp->v_op != coda_vnodeop_p) {
 	vrele(tvp);
 	NDFREE(&ndp, NDF_ONLY_PNBUF);
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
 	CODADEBUG(CODA_IOCTL, 
 		 myprintf(("coda_ioctl error: %s not a coda object\n", 
 			iap->path));)
 	return(EINVAL);
     }
 
     if (iap->vi.in_size > VC_MAXDATASIZE) {
 	NDFREE(&ndp, 0);
 	return(EINVAL);
     }
     error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, p);
 
     if (error)
 	MARK_INT_FAIL(CODA_IOCTL_STATS);
     else
 	CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )
 
     vrele(tvp);
     NDFREE(&ndp, NDF_ONLY_PNBUF);
     return(error);
 }
 
 /*
  * To reduce the cost of a user-level venus;we cache attributes in
  * the kernel.  Each cnode has storage allocated for an attribute. If
  * c_vattr is valid, return a reference to it. Otherwise, get the
  * attributes from venus and store them in the cnode.  There is some
  * question if this method is a security leak. But I think that in
  * order to make this call, the user must have done a lookup and
  * opened the file, and therefore should already have access.  
  */
 int
 coda_getattr(v)
     void *v;
 {
 /* true args */
     struct vop_getattr_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct vattr *vap = ap->a_vap;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_GETATTR_STATS);
 
     if (IS_UNMOUNTING(cp))
 	return ENODEV;
 
     /* Check for getattr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_GETATTR_STATS);
 	return(ENOENT);
     }
 
     /* Check to see if the attributes have already been cached */
     if (VALID_VATTR(cp)) { 
 	CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: (%lx.%lx.%lx)\n",
 				       cp->c_fid.Volume,
 				       cp->c_fid.Vnode,
 				       cp->c_fid.Unique));});
 	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
 		 print_vattr(&cp->c_vattr); );
 	
 	*vap = cp->c_vattr;
 	MARK_INT_SAT(CODA_GETATTR_STATS);
 	return(0);
     }
 
     error = venus_getattr(vtomi(vp), &cp->c_fid, cred, p, vap);
 
     if (!error) {
 	CODADEBUG(CODA_GETATTR, myprintf(("getattr miss (%lx.%lx.%lx): result %d\n",
 				     cp->c_fid.Volume,
 				     cp->c_fid.Vnode,
 				     cp->c_fid.Unique,
 				     error)); )
 	    
 	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
 		 print_vattr(vap);	);
 	
     {	int size = vap->va_size;
     	struct vnode *convp = cp->c_ovp;
 	if (convp != (struct vnode *)0) {
 	    vnode_pager_setsize(convp, size);
 	}
     }
 	/* If not open for write, store attributes in cnode */   
 	if ((cp->c_owrite == 0) && (coda_attr_cache)) {  
 	    cp->c_vattr = *vap;
 	    cp->c_flags |= C_VATTR; 
 	}
 	
     }
     return(error);
 }
 
 int
 coda_setattr(v)
     void *v;
 {
 /* true args */
     struct vop_setattr_args *ap = v;
     register struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     register struct vattr *vap = ap->a_vap;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_SETATTR_STATS);
 
     /* Check for setattr of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_SETATTR_STATS);
 	return(ENOENT);
     }
 
     if (codadebug & CODADBGMSK(CODA_SETATTR)) {
 	print_vattr(vap);
     }
     error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, p);
 
     if (!error)
 	cp->c_flags &= ~C_VATTR;
 
     {	int size = vap->va_size;
     	struct vnode *convp = cp->c_ovp;
 	if (size != VNOVAL && convp != (struct vnode *)0) {
 	    vnode_pager_setsize(convp, size);
 	}
     }
     CODADEBUG(CODA_SETATTR,	myprintf(("setattr %d\n", error)); )
     return(error);
 }
 
 int
 coda_access(v)
     void *v;
 {
 /* true args */
     struct vop_access_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     int mode = ap->a_mode;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     int error;
 
     MARK_ENTRY(CODA_ACCESS_STATS);
 
     /* Check for access of control object.  Only read access is
        allowed on it. */
     if (IS_CTL_VP(vp)) {
 	/* bogus hack - all will be marked as successes */
 	MARK_INT_SAT(CODA_ACCESS_STATS);
 	return(((mode & VREAD) && !(mode & (VWRITE | VEXEC))) 
 	       ? 0 : EACCES);
     }
 
     /*
      * if the file is a directory, and we are checking exec (eg lookup) 
      * access, and the file is in the namecache, then the user must have 
      * lookup access to it.
      */
     if (coda_access_cache) {
 	if ((vp->v_type == VDIR) && (mode & VEXEC)) {
 	    if (coda_nc_lookup(cp, ".", 1, cred)) {
 		MARK_INT_SAT(CODA_ACCESS_STATS);
 		return(0);                     /* it was in the cache */
 	    }
 	}
     }
 
     error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, p);
 
     return(error);
 }
 
 int
 coda_readlink(v)
     void *v;
 {
 /* true args */
     struct vop_readlink_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct uio *uiop = ap->a_uio;
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_uio->uio_procp;
 /* locals */
     int error;
     char *str;
     int len;
 
     MARK_ENTRY(CODA_READLINK_STATS);
 
     /* Check for readlink of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_READLINK_STATS);
 	return(ENOENT);
     }
 
     if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
 	uiop->uio_rw = UIO_READ;
 	error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
 	if (error)
 	    MARK_INT_FAIL(CODA_READLINK_STATS);
 	else
 	    MARK_INT_SAT(CODA_READLINK_STATS);
 	return(error);
     }
 
     error = venus_readlink(vtomi(vp), &cp->c_fid, cred, p, &str, &len);
 
     if (!error) {
 	uiop->uio_rw = UIO_READ;
 	error = uiomove(str, len, uiop);
 
 	if (coda_symlink_cache) {
 	    cp->c_symlink = str;
 	    cp->c_symlen = len;
 	    cp->c_flags |= C_SYMLINK;
 	} else
 	    CODA_FREE(str, len);
     }
 
     CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
     return(error);
 }
 
 int
 coda_fsync(v)
     void *v;
 {
 /* true args */
     struct vop_fsync_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct ucred *cred = ap->a_cred;
     struct proc *p = ap->a_p;
 /* locals */
     struct vnode *convp = cp->c_ovp;
     int error;
    
     MARK_ENTRY(CODA_FSYNC_STATS);
 
     /* Check for fsync on an unmounting object */
     /* The NetBSD kernel, in it's infinite wisdom, can try to fsync
      * after an unmount has been initiated.  This is a Bad Thing,
      * which we have to avoid.  Not a legitimate failure for stats.
      */
     if (IS_UNMOUNTING(cp)) {
 	return(ENODEV);
     }
 
     /* Check for fsync of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_FSYNC_STATS);
 	return(0);
     }
 
     if (convp)
     	VOP_FSYNC(convp, cred, MNT_WAIT, p);
 
     /*
      * We see fsyncs with usecount == 1 then usecount == 0.
      * For now we ignore them.
      */
     /*
     if (!vp->v_usecount) {
     	printf("coda_fsync on vnode %p with %d usecount.  c_flags = %x (%x)\n",
 		vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING);
     }
     */
 
     /*
      * We can expect fsync on any vnode at all if venus is pruging it.
      * Venus can't very well answer the fsync request, now can it?
      * Hopefully, it won't have to, because hopefully, venus preserves
      * the (possibly untrue) invariant that it never purges an open
      * vnode.  Hopefully.
      */
     if (cp->c_flags & C_PURGING) {
 	return(0);
     }
 
     /* needs research */
     return 0;
     error = venus_fsync(vtomi(vp), &cp->c_fid, cred, p);
 
     CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); );
     return(error);
 }
 
 int
 coda_inactive(v)
     void *v;
 {
     /* XXX - at the moment, inactive doesn't look at cred, and doesn't
        have a proc pointer.  Oops. */
 /* true args */
     struct vop_inactive_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct ucred *cred __attribute__((unused)) = NULL;
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
     /* We don't need to send inactive to venus - DCS */
     MARK_ENTRY(CODA_INACTIVE_STATS);
 
     if (IS_CTL_VP(vp)) {
 	MARK_INT_SAT(CODA_INACTIVE_STATS);
 	return 0;
     }
 
     CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %lx.%lx.%lx. vfsp %p\n",
 				  cp->c_fid.Volume, cp->c_fid.Vnode, 
 				  cp->c_fid.Unique, vp->v_mount));)
 
     /* If an array has been allocated to hold the symlink, deallocate it */
     if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
 	if (cp->c_symlink == NULL)
 	    panic("coda_inactive: null symlink pointer in cnode");
 	
 	CODA_FREE(cp->c_symlink, cp->c_symlen);
 	cp->c_flags &= ~C_SYMLINK;
 	cp->c_symlen = 0;
     }
 
     /* Remove it from the table so it can't be found. */
     coda_unsave(cp);
     if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) {
 	myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
 	panic("badness in coda_inactive\n");
     }
 
     if (IS_UNMOUNTING(cp)) {
 #ifdef	DEBUG
 	printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vp->v_usecount, vp, cp);
 	if (cp->c_ovp != NULL)
 	    printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n",
 	    	   vp->v_usecount, vp, cp);
 #endif
 	lockmgr(&cp->c_lock, LK_RELEASE, &vp->v_interlock, p);
     } else {
 #ifdef OLD_DIAGNOSTIC
 	if (CTOV(cp)->v_usecount) {
 	    panic("coda_inactive: nonzero reference count");
 	}
 	if (cp->c_ovp != NULL) {
 	    panic("coda_inactive:  cp->ovp != NULL");
 	}
 #endif
 	VOP_UNLOCK(vp, 0, p);
 	vgone(vp);
     }
 
     MARK_INT_SAT(CODA_INACTIVE_STATS);
     return(0);
 }
 
 /*
  * Remote file system operations having to do with directory manipulation.
  */
 
 /* 
  * It appears that in NetBSD, lookup is supposed to return the vnode locked
  */
 int
 coda_lookup(v)
     void *v;
 {
 /* true args */
     struct vop_lookup_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct vnode **vpp = ap->a_vpp;
     /* 
      * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest
      * of the string to xlate, and that we must try to get at least
      * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth.  I
      * could be wrong. 
      */
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     struct cnode *cp;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     ViceFid VFid;
     int	vtype;
     int error = 0;
 
     MARK_ENTRY(CODA_LOOKUP_STATS);
 
     CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %lx.%lx.%lx\n",
 				   nm, dcp->c_fid.Volume,
 				   dcp->c_fid.Vnode, dcp->c_fid.Unique)););
 
     /* Check for lookup of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = coda_ctlvp;
 	vref(*vpp);
 	MARK_INT_SAT(CODA_LOOKUP_STATS);
 	goto exit;
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_LOOKUP_STATS);
 	CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %lx.%lx.%lx(%s)\n",
 				    dcp->c_fid.Volume, dcp->c_fid.Vnode,
 				    dcp->c_fid.Unique, nm)););
 	*vpp = (struct vnode *)0;
 	error = EINVAL;
 	goto exit;
     }
     /* First try to look the file up in the cfs name cache */
     /* lock the parent vnode? */
     cp = coda_nc_lookup(dcp, nm, len, cred);
     if (cp) {
 	*vpp = CTOV(cp);
 	vref(*vpp);
 	CODADEBUG(CODA_LOOKUP, 
 		 myprintf(("lookup result %d vpp %p\n",error,*vpp));)
     } else {
 	
 	/* The name wasn't cached, so we need to contact Venus */
 	error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, p, &VFid, &vtype);
 	
 	if (error) {
 	    MARK_INT_FAIL(CODA_LOOKUP_STATS);
 	    CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %lx.%lx.%lx(%s)%d\n",
 					dcp->c_fid.Volume, dcp->c_fid.Vnode, dcp->c_fid.Unique, nm, error));)
 	    *vpp = (struct vnode *)0;
 	} else {
 	    MARK_INT_SAT(CODA_LOOKUP_STATS);
 	    CODADEBUG(CODA_LOOKUP, 
 		     myprintf(("lookup: vol %lx vno %lx uni %lx type %o result %d\n",
 			    VFid.Volume, VFid.Vnode, VFid.Unique, vtype,
 			    error)); )
 		
 	    cp = make_coda_node(&VFid, dvp->v_mount, vtype);
 	    *vpp = CTOV(cp);
 	    
 	    /* enter the new vnode in the Name Cache only if the top bit isn't set */
 	    /* And don't enter a new vnode for an invalid one! */
 	    if (!(vtype & CODA_NOCACHE))
 		coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 	}
     }
 
  exit:
     /* 
      * If we are creating, and this was the last name to be looked up,
      * and the error was ENOENT, then there really shouldn't be an
      * error and we can make the leaf NULL and return success.  Since
      * this is supposed to work under Mach as well as NetBSD, we're
      * leaving this fn wrapped.  We also must tell lookup/namei that
      * we need to save the last component of the name.  (Create will
      * have to free the name buffer later...lucky us...)
      */
     if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
 	&& (cnp->cn_flags & ISLASTCN)
 	&& (error == ENOENT))
     {
 	error = EJUSTRETURN;
 	cnp->cn_flags |= SAVENAME;
 	*ap->a_vpp = NULL;
     }
 
     /* 
      * If we are removing, and we are at the last element, and we
      * found it, then we need to keep the name around so that the
      * removal will go ahead as planned.  Unfortunately, this will
      * probably also lock the to-be-removed vnode, which may or may
      * not be a good idea.  I'll have to look at the bits of
      * coda_remove to make sure.  We'll only save the name if we did in
      * fact find the name, otherwise coda_remove won't have a chance
      * to free the pathname.  
      */
     if ((cnp->cn_nameiop == DELETE)
 	&& (cnp->cn_flags & ISLASTCN)
 	&& !error)
     {
 	cnp->cn_flags |= SAVENAME;
     }
 
     /* 
      * If the lookup went well, we need to (potentially?) unlock the
      * parent, and lock the child.  We are only responsible for
      * checking to see if the parent is supposed to be unlocked before
      * we return.  We must always lock the child (provided there is
      * one, and (the parent isn't locked or it isn't the same as the
      * parent.)  Simple, huh?  We can never leave the parent locked unless
      * we are ISLASTCN
      */
     if (!error || (error == EJUSTRETURN)) {
 	if (!(cnp->cn_flags & LOCKPARENT) || !(cnp->cn_flags & ISLASTCN)) {
 	    if ((error = VOP_UNLOCK(dvp, 0, p))) {
 		return error; 
 	    }	    
 	    /* 
 	     * The parent is unlocked.  As long as there is a child,
 	     * lock it without bothering to check anything else. 
 	     */
 	    if (*ap->a_vpp) {
 		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		    printf("coda_lookup: ");
 		    panic("unlocked parent but couldn't lock child");
 		}
 	    }
 	} else {
 	    /* The parent is locked, and may be the same as the child */
 	    if (*ap->a_vpp && (*ap->a_vpp != dvp)) {
 		/* Different, go ahead and lock it. */
 		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		    printf("coda_lookup: ");
 		    panic("unlocked parent but couldn't lock child");
 		}
 	    }
 	}
     } else {
 	/* If the lookup failed, we need to ensure that the leaf is NULL */
 	/* Don't change any locking? */
 	*ap->a_vpp = NULL;
     }
     return(error);
 }
 
 /*ARGSUSED*/
 int
 coda_create(v)
     void *v;
 {
 /* true args */
     struct vop_create_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct vattr *va = ap->a_vap;
     int exclusive = 1;
     int mode = ap->a_vap->va_mode;
     struct vnode **vpp = ap->a_vpp;
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     struct cnode *cp;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     ViceFid VFid;
     struct vattr attr;
 
     MARK_ENTRY(CODA_CREATE_STATS);
 
     /* All creates are exclusive XXX */
     /* I'm assuming the 'mode' argument is the file mode bits XXX */
 
     /* Check for create of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_CREATE_STATS);
 	return(EACCES);
     }
 
     error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, p, &VFid, &attr);
 
     if (!error) {
 	
 	/* If this is an exclusive create, panic if the file already exists. */
 	/* Venus should have detected the file and reported EEXIST. */
 
 	if ((exclusive == 1) &&
 	    (coda_find(&VFid) != NULL))
 	    panic("cnode existed for newly created file!");
 	
 	cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
 	*vpp = CTOV(cp);
 	
 	/* Update va to reflect the new attributes. */
 	(*va) = attr;
 	
 	/* Update the attribute cache and mark it as valid */
 	if (coda_attr_cache) {
 	    VTOC(*vpp)->c_vattr = attr;
 	    VTOC(*vpp)->c_flags |= C_VATTR;       
 	}
 
 	/* Invalidate the parent's attr cache, the modification time has changed */
 	VTOC(dvp)->c_flags &= ~C_VATTR;
 	
 	/* enter the new vnode in the Name Cache */
 	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 	
 	CODADEBUG(CODA_CREATE, 
 		 myprintf(("create: (%lx.%lx.%lx), result %d\n",
 			VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
     } else {
 	*vpp = (struct vnode *)0;
 	CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));)
     }
 
     if (!error) {
 	if (cnp->cn_flags & LOCKLEAF) {
 	    if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
 		printf("coda_create: ");
 		panic("unlocked parent but couldn't lock child");
 	    }
 	}
 #ifdef OLD_DIAGNOSTIC
 	else {
 	    printf("coda_create: LOCKLEAF not set!\n");
 	}
 #endif
     }
     return(error);
 }
 
 int
 coda_remove(v)
     void *v;
 {
 /* true args */
     struct vop_remove_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *cp = VTOC(dvp);
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *tp;
 
     MARK_ENTRY(CODA_REMOVE_STATS);
 
     CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %lx.%lx.%lx\n",
 				   nm, cp->c_fid.Volume, cp->c_fid.Vnode,
 				   cp->c_fid.Unique)););
 
     /* Remove the file's entry from the CODA Name Cache */
     /* We're being conservative here, it might be that this person
      * doesn't really have sufficient access to delete the file
      * but we feel zapping the entry won't really hurt anyone -- dcs
      */
     /* I'm gonna go out on a limb here. If a file and a hardlink to it
      * exist, and one is removed, the link count on the other will be
      * off by 1. We could either invalidate the attrs if cached, or
      * fix them. I'll try to fix them. DCS 11/8/94
      */
     tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
     if (tp) {
 	if (VALID_VATTR(tp)) {	/* If attrs are cached */
 	    if (tp->c_vattr.va_nlink > 1) {	/* If it's a hard link */
 		tp->c_vattr.va_nlink--;
 	    }
 	}
 	
 	coda_nc_zapfile(VTOC(dvp), nm, len); 
 	/* No need to flush it if it doesn't exist! */
     }
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(dvp)->c_flags &= ~C_VATTR;
 
     /* Check for remove of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	MARK_INT_FAIL(CODA_REMOVE_STATS);
 	return(ENOENT);
     }
 
     error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, p);
 
     CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )
 
     return(error);
 }
 
 int
 coda_link(v)
     void *v;
 {
 /* true args */
     struct vop_link_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct vnode *tdvp = ap->a_tdvp;
     struct cnode *tdcp = VTOC(tdvp);
     struct componentname *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
 
     MARK_ENTRY(CODA_LINK_STATS);
 
     if (codadebug & CODADBGMSK(CODA_LINK)) {
 
 	myprintf(("nb_link:   vp fid: (%lx.%lx.%lx)\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
 	myprintf(("nb_link: tdvp fid: (%lx.%lx.%lx)\n",
 		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
 	
     }
     if (codadebug & CODADBGMSK(CODA_LINK)) {
 	myprintf(("link:   vp fid: (%lx.%lx.%lx)\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
 	myprintf(("link: tdvp fid: (%lx.%lx.%lx)\n",
 		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
 
     }
 
     /* Check for link to/from control object. */
     if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_LINK_STATS);
 	return(EACCES);
     }
 
     error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, p);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(tdvp)->c_flags &= ~C_VATTR;
     VTOC(vp)->c_flags &= ~C_VATTR;
 
     CODADEBUG(CODA_LINK,	myprintf(("in link result %d\n",error)); )
 
     return(error);
 }
 
 int
 coda_rename(v)
     void *v;
 {
 /* true args */
     struct vop_rename_args *ap = v;
     struct vnode *odvp = ap->a_fdvp;
     struct cnode *odcp = VTOC(odvp);
     struct componentname  *fcnp = ap->a_fcnp;
     struct vnode *ndvp = ap->a_tdvp;
     struct cnode *ndcp = VTOC(ndvp);
     struct componentname  *tcnp = ap->a_tcnp;
     struct ucred *cred = fcnp->cn_cred;
     struct proc *p = fcnp->cn_proc;
 /* true args */
     int error;
     const char *fnm = fcnp->cn_nameptr;
     int flen = fcnp->cn_namelen;
     const char *tnm = tcnp->cn_nameptr;
     int tlen = tcnp->cn_namelen;
 
     MARK_ENTRY(CODA_RENAME_STATS);
 
     /* Hmmm.  The vnodes are already looked up.  Perhaps they are locked?
        This could be Bad. XXX */
 #ifdef OLD_DIAGNOSTIC
     if ((fcnp->cn_cred != tcnp->cn_cred)
 	|| (fcnp->cn_proc != tcnp->cn_proc))
     {
 	panic("coda_rename: component names don't agree");
     }
 #endif
 
     /* Check for rename involving control object. */ 
     if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	return(EACCES);
     }
 
     /* Problem with moving directories -- need to flush entry for .. */
     if (odvp != ndvp) {
 	struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
 	if (ovcp) {
 	    struct vnode *ovp = CTOV(ovcp);
 	    if ((ovp) &&
 		(ovp->v_type == VDIR)) /* If it's a directory */
 		coda_nc_zapfile(VTOC(ovp),"..", 2);
 	}
     }
 
     /* Remove the entries for both source and target files */
     coda_nc_zapfile(VTOC(odvp), fnm, flen);
     coda_nc_zapfile(VTOC(ndvp), tnm, tlen);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     VTOC(odvp)->c_flags &= ~C_VATTR;
     VTOC(ndvp)->c_flags &= ~C_VATTR;
 
     if (flen+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     if (tlen+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_RENAME_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, p);
 
  exit:
     CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
     /* XXX - do we need to call cache pureg on the moved vnode? */
     cache_purge(ap->a_fvp);
 
     /* It seems to be incumbent on us to drop locks on all four vnodes */
     /* From-vnodes are not locked, only ref'd.  To-vnodes are locked. */
 
     vrele(ap->a_fvp);
     vrele(odvp);
 
     if (ap->a_tvp) {
 	if (ap->a_tvp == ndvp) {
 	    vrele(ap->a_tvp);
 	} else {
 	    vput(ap->a_tvp);
 	}
     }
 
     vput(ndvp);
     return(error);
 }
 
 int
 coda_mkdir(v)
     void *v;
 {
 /* true args */
     struct vop_mkdir_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);	
     struct componentname  *cnp = ap->a_cnp;
     register struct vattr *va = ap->a_vap;
     struct vnode **vpp = ap->a_vpp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* locals */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *cp;
     ViceFid VFid;
     struct vattr ova;
 
     MARK_ENTRY(CODA_MKDIR_STATS);
 
     /* Check for mkdir of target object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_MKDIR_STATS);
 	return(EACCES);
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	*vpp = (struct vnode *)0;
 	MARK_INT_FAIL(CODA_MKDIR_STATS);
 	return(EACCES);
     }
 
     error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, p, &VFid, &ova);
 
     if (!error) {
 	if (coda_find(&VFid) != NULL)
 	    panic("cnode existed for newly created directory!");
 	
 	
 	cp =  make_coda_node(&VFid, dvp->v_mount, va->va_type);
 	*vpp = CTOV(cp);
 	
 	/* enter the new vnode in the Name Cache */
 	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
 
 	/* as a side effect, enter "." and ".." for the directory */
 	coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
 	coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));
 
 	if (coda_attr_cache) {
 	    VTOC(*vpp)->c_vattr = ova;		/* update the attr cache */
 	    VTOC(*vpp)->c_flags |= C_VATTR;	/* Valid attributes in cnode */
 	}
 
 	/* Invalidate the parent's attr cache, the modification time has changed */
 	VTOC(dvp)->c_flags &= ~C_VATTR;
 	
 	CODADEBUG( CODA_MKDIR, myprintf(("mkdir: (%lx.%lx.%lx) result %d\n",
 				    VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
     } else {
 	*vpp = (struct vnode *)0;
 	CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));)
     }
 
     return(error);
 }
 
 int
 coda_rmdir(v)
     void *v;
 {
 /* true args */
     struct vop_rmdir_args *ap = v;
     struct vnode *dvp = ap->a_dvp;
     struct cnode *dcp = VTOC(dvp);
     struct componentname  *cnp = ap->a_cnp;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
 /* true args */
     int error;
     const char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     struct cnode *cp;
    
     MARK_ENTRY(CODA_RMDIR_STATS);
 
     /* Check for rmdir of control object. */
     if (IS_CTL_NAME(dvp, nm, len)) {
 	MARK_INT_FAIL(CODA_RMDIR_STATS);
 	return(ENOENT);
     }
 
     /* We're being conservative here, it might be that this person
      * doesn't really have sufficient access to delete the file
      * but we feel zapping the entry won't really hurt anyone -- dcs
      */
     /*
      * As a side effect of the rmdir, remove any entries for children of
      * the directory, especially "." and "..".
      */
     cp = coda_nc_lookup(dcp, nm, len, cred);
     if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);
 
     /* Remove the file's entry from the CODA Name Cache */
     coda_nc_zapfile(dcp, nm, len);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     dcp->c_flags &= ~C_VATTR;
 
     error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, p);
 
     CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )
 
     return(error);
 }
 
 int
 coda_symlink(v)
     void *v;
 {
 /* true args */
     struct vop_symlink_args *ap = v;
     struct vnode *tdvp = ap->a_dvp;
     struct cnode *tdcp = VTOC(tdvp);	
     struct componentname *cnp = ap->a_cnp;
     struct vattr *tva = ap->a_vap;
     char *path = ap->a_target;
     struct ucred *cred = cnp->cn_cred;
     struct proc *p = cnp->cn_proc;
     struct vnode **vpp = ap->a_vpp;
 /* locals */
     int error;
     /* 
      * XXX I'm assuming the following things about coda_symlink's
      * arguments: 
      *       t(foo) is the new name/parent/etc being created.
      *       lname is the contents of the new symlink. 
      */
     char *nm = cnp->cn_nameptr;
     int len = cnp->cn_namelen;
     int plen = strlen(path);
 
     /* 
      * Here's the strategy for the moment: perform the symlink, then
      * do a lookup to grab the resulting vnode.  I know this requires
      * two communications with Venus for a new sybolic link, but
      * that's the way the ball bounces.  I don't yet want to change
      * the way the Mach symlink works.  When Mach support is
      * deprecated, we should change symlink so that the common case
      * returns the resultant vnode in a vpp argument.
      */
 
     MARK_ENTRY(CODA_SYMLINK_STATS);
 
     /* Check for symlink of control object. */
     if (IS_CTL_NAME(tdvp, nm, len)) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	return(EACCES);
     }
 
     if (plen+1 > CODA_MAXPATHLEN) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	return(EINVAL);
     }
 
     if (len+1 > CODA_MAXNAMLEN) {
 	MARK_INT_FAIL(CODA_SYMLINK_STATS);
 	error = EINVAL;
 	goto exit;
     }
 
     error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, p);
 
     /* Invalidate the parent's attr cache, the modification time has changed */
     tdcp->c_flags &= ~C_VATTR;
 
     if (error == 0)
 	error = VOP_LOOKUP(tdvp, vpp, cnp);
 
  exit:    
     CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
     return(error);
 }
 
 /*
  * Read directory entries.
  */
 int
 coda_readdir(v)
     void *v;
 {
 /* true args */
     struct vop_readdir_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     register struct uio *uiop = ap->a_uio;
     struct ucred *cred = ap->a_cred;
     int *eofflag = ap->a_eofflag;
     u_long **cookies = ap->a_cookies;
     int *ncookies = ap->a_ncookies;
     struct proc *p = ap->a_uio->uio_procp;
 /* upcall decl */
 /* locals */
     int error = 0;
 
     MARK_ENTRY(CODA_READDIR_STATS);
 
     CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %lld, %d)\n",
 				      (void *)uiop->uio_iov->iov_base,
 				      uiop->uio_resid,
 				      (long long)uiop->uio_offset,
 				      uiop->uio_segflg)); )
 	
     /* Check for readdir of control object. */
     if (IS_CTL_VP(vp)) {
 	MARK_INT_FAIL(CODA_READDIR_STATS);
 	return(ENOENT);
     }
 
     {
 	/* If directory is not already open do an "internal open" on it. */
 	int opened_internally = 0;
 	if (cp->c_ovp == NULL) {
 	    opened_internally = 1;
 	    MARK_INT_GEN(CODA_OPEN_STATS);
 	    error = VOP_OPEN(vp, FREAD, cred, p);
 printf("coda_readdir: Internally Opening %p\n", vp);
 	    if (error) {
 		printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
 		return (error);
 	    }
 	    if (vp->v_type == VREG) {
 		error = vfs_object_create(vp, p, cred);
 		if (error != 0) {
 		    printf("coda_readdir: vfs_object_create() returns %d\n", error);
 		    vput(vp);
 		}
 	    }
 	    if (error) return(error);
 	}
 	
 	/* Have UFS handle the call. */
 	CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = (%lx.%lx.%lx), refcnt = %d\n",cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique, vp->v_usecount)); )
 	error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
 			       cookies);
 	
 	if (error)
 	    MARK_INT_FAIL(CODA_READDIR_STATS);
 	else
 	    MARK_INT_SAT(CODA_READDIR_STATS);
 	
 	/* Do an "internal close" if necessary. */ 
 	if (opened_internally) {
 	    MARK_INT_GEN(CODA_CLOSE_STATS);
 	    (void)VOP_CLOSE(vp, FREAD, cred, p);
 	}
     }
 
     return(error);
 }
 
 /*
  * Convert from file system blocks to device blocks
  */
 int
 coda_bmap(v)
     void *v;
 {
     /* XXX on the global proc */
 /* true args */
     struct vop_bmap_args *ap = v;
     struct vnode *vp __attribute__((unused)) = ap->a_vp;	/* file's vnode */
     daddr_t bn __attribute__((unused)) = ap->a_bn;	/* fs block number */
     struct vnode **vpp = ap->a_vpp;			/* RETURN vp of device */
     daddr_t *bnp __attribute__((unused)) = ap->a_bnp;	/* RETURN device block number */
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
 	int ret = 0;
 	struct cnode *cp;
 
 	cp = VTOC(vp);
 	if (cp->c_ovp) {
 		return EINVAL;
 		ret =  VOP_BMAP(cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb);
 #if	0
 		printf("VOP_BMAP(cp->c_ovp %p, bn %p, vpp %p, bnp %p, ap->a_runp %p, ap->a_runb %p) = %d\n",
 			cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb, ret);
 #endif
 		return ret;
 	} else {
 #if	0
 		printf("coda_bmap: no container\n");
 #endif
 		return(EOPNOTSUPP);
 	}
 }
 
 /*
  * I don't think the following two things are used anywhere, so I've
  * commented them out 
  * 
  * struct buf *async_bufhead; 
  * int async_daemon_count;
  */
 int
 coda_strategy(v)
     void *v;
 {
 /* true args */
     struct vop_strategy_args *ap = v;
     register struct buf *bp __attribute__((unused)) = ap->a_bp;
     struct proc *p __attribute__((unused)) = curproc;
 /* upcall decl */
 /* locals */
 
 	printf("coda_strategy: called ???\n");
 	return(EOPNOTSUPP);
 }
 
 int
 coda_reclaim(v) 
     void *v;
 {
 /* true args */
     struct vop_reclaim_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
 /* upcall decl */
 /* locals */
 
 /*
  * Forced unmount/flush will let vnodes with non zero use be destroyed!
  */
     ENTRY;
 
     if (IS_UNMOUNTING(cp)) {
 #ifdef	DEBUG
 	if (VTOC(vp)->c_ovp) {
 	    if (IS_UNMOUNTING(cp))
 		printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp);
 	}
 #endif
     } else {
 #ifdef OLD_DIAGNOSTIC
 	if (vp->v_usecount != 0) 
 	    print("coda_reclaim: pushing active %p\n", vp);
 	if (VTOC(vp)->c_ovp) {
 	    panic("coda_reclaim: c_ovp not void");
     }
 #endif
     }	
     cache_purge(vp);
     lockdestroy(&(VTOC(vp)->c_lock));
     coda_free(VTOC(vp));
     VTOC(vp) = NULL;
     return (0);
 }
 
 int
 coda_lock(v)
     void *v;
 {
 /* true args */
     struct vop_lock_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct proc  *p = ap->a_p;
 /* upcall decl */
 /* locals */
 
     ENTRY;
 
     if (coda_lockdebug) {
 	myprintf(("Attempting lock on %lx.%lx.%lx\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
     }
 
 #ifndef	DEBUG_LOCKS
     return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p));
 #else
     return (debuglockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p,
 			 "coda_lock", vp->filename, vp->line));
 #endif
 }
 
 int
 coda_unlock(v)
     void *v;
 {
 /* true args */
     struct vop_unlock_args *ap = v;
     struct vnode *vp = ap->a_vp;
     struct cnode *cp = VTOC(vp);
     struct proc  *p = ap->a_p;
 /* upcall decl */
 /* locals */
 
     ENTRY;
     if (coda_lockdebug) {
 	myprintf(("Attempting unlock on %lx.%lx.%lx\n",
 		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
     }
 
     return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, p));
 }
 
 int
 coda_islocked(v)
     void *v;
 {
 /* true args */
     struct vop_islocked_args *ap = v;
     struct cnode *cp = VTOC(ap->a_vp);
     ENTRY;
 
     return (lockstatus(&cp->c_lock, ap->a_p));
 }
 
 /* How one looks up a vnode given a device/inode pair: */
 int
 coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp)
 {
     /* This is like VFS_VGET() or igetinode()! */
     int           error;
     struct mount *mp;
 
     if (!(mp = devtomp(dev))) {
 	myprintf(("coda_grab_vnode: devtomp(%#lx) returns NULL\n",
 		  (u_long)dev2udev(dev)));
 	return(ENXIO);
     }
 
     /* XXX - ensure that nonzero-return means failure */
     error = VFS_VGET(mp,ino,vpp);
     if (error) {
 	myprintf(("coda_grab_vnode: iget/vget(%lx, %lu) returns %p, err %d\n", 
 		  (u_long)dev2udev(dev), (u_long)ino, (void *)*vpp, error));
 	return(ENOENT);
     }
     return(0);
 }
 
 void
 print_vattr( attr )
 	struct vattr *attr;
 {
     char *typestr;
 
     switch (attr->va_type) {
     case VNON:
 	typestr = "VNON";
 	break;
     case VREG:
 	typestr = "VREG";
 	break;
     case VDIR:
 	typestr = "VDIR";
 	break;
     case VBLK:
 	typestr = "VBLK";
 	break;
     case VCHR:
 	typestr = "VCHR";
 	break;
     case VLNK:
 	typestr = "VLNK";
 	break;
     case VSOCK:
 	typestr = "VSCK";
 	break;
     case VFIFO:
 	typestr = "VFFO";
 	break;
     case VBAD:
 	typestr = "VBAD";
 	break;
     default:
 	typestr = "????";
 	break;
     }
 
 
     myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
 	      typestr, (int)attr->va_mode, (int)attr->va_uid,
 	      (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));
 
     myprintf(("      fileid %d nlink %d size %d blocksize %d bytes %d\n",
 	      (int)attr->va_fileid, (int)attr->va_nlink, 
 	      (int)attr->va_size,
 	      (int)attr->va_blocksize,(int)attr->va_bytes));
     myprintf(("      gen %ld flags %ld vaflags %d\n",
 	      attr->va_gen, attr->va_flags, attr->va_vaflags));
     myprintf(("      atime sec %d nsec %d\n",
 	      (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
     myprintf(("      mtime sec %d nsec %d\n",
 	      (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
     myprintf(("      ctime sec %d nsec %d\n",
 	      (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
 }
 
 /* How to print a ucred */
 void
 print_cred(cred)
 	struct ucred *cred;
 {
 
 	int i;
 
 	myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid));
 
 	for (i=0; i < cred->cr_ngroups; i++)
 		myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i]));
 	myprintf(("\n"));
 
 }
 
 /*
  * Return a vnode for the given fid.
  * If no cnode exists for this fid create one and put it
  * in a table hashed by fid.Volume and fid.Vnode.  If the cnode for
  * this fid is already in the table return it (ref count is
  * incremented by coda_find.  The cnode will be flushed from the
  * table when coda_inactive calls coda_unsave.
  */
 struct cnode *
 make_coda_node(fid, vfsp, type)
      ViceFid *fid; struct mount *vfsp; short type;
 {
     struct cnode *cp;
     int          err;
 
     if ((cp = coda_find(fid)) == NULL) {
 	struct vnode *vp;
 	
 	cp = coda_alloc();
 	lockinit(&cp->c_lock, PINOD, "cnode", 0, 0);
 	cp->c_fid = *fid;
 	
 	err = getnewvnode(VT_CODA, vfsp, coda_vnodeop_p, &vp);  
 	if (err) {                                                
 	    panic("coda: getnewvnode returned error %d\n", err);   
 	}                                                         
 	vp->v_data = cp;                                          
 	vp->v_type = type;                                      
 	cp->c_vnode = vp;                                         
 	coda_save(cp);
 	
     } else {
 	vref(CTOV(cp));
     }
 
     return cp;
 }
Index: head/sys/fs/hpfs/hpfs_vnops.c
===================================================================
--- head/sys/fs/hpfs/hpfs_vnops.c	(revision 75579)
+++ head/sys/fs/hpfs/hpfs_vnops.c	(revision 75580)
@@ -1,1426 +1,1425 @@
 /*-
  * Copyright (c) 1998, 1999 Semen Ustimenko (semenu@FreeBSD.org)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/dirent.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #if !defined(__FreeBSD__)
 #include <vm/vm_prot.h>
 #endif
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_zone.h>
 #if defined(__FreeBSD__)
 #include <vm/vnode_pager.h>
 #endif
 #include <vm/vm_extern.h>
 
 #if !defined(__FreeBSD__)
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/genfs/genfs.h>
 #endif
 
 #include <sys/unistd.h> /* for pathconf(2) constants */
 
 #include <fs/hpfs/hpfs.h>
 #include <fs/hpfs/hpfsmount.h>
 #include <fs/hpfs/hpfs_subr.h>
 #include <fs/hpfs/hpfs_ioctl.h>
 
 static int	hpfs_de_uiomove __P((struct hpfsmount *, struct hpfsdirent *,
 				     struct uio *));
 static int	hpfs_ioctl __P((struct vop_ioctl_args *ap));
 static int	hpfs_bypass __P((struct vop_generic_args *ap));
 static int	hpfs_read __P((struct vop_read_args *));
 static int	hpfs_write __P((struct vop_write_args *ap));
 static int	hpfs_getattr __P((struct vop_getattr_args *ap));
 static int	hpfs_setattr __P((struct vop_setattr_args *ap));
 static int	hpfs_inactive __P((struct vop_inactive_args *ap));
 static int	hpfs_print __P((struct vop_print_args *ap));
 static int	hpfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	hpfs_strategy __P((struct vop_strategy_args *ap));
 static int	hpfs_access __P((struct vop_access_args *ap));
 static int	hpfs_open __P((struct vop_open_args *ap));
 static int	hpfs_close __P((struct vop_close_args *ap));
 static int	hpfs_readdir __P((struct vop_readdir_args *ap));
 static int	hpfs_lookup __P((struct vop_lookup_args *ap));
 static int	hpfs_create __P((struct vop_create_args *));
 static int	hpfs_remove __P((struct vop_remove_args *));
 static int	hpfs_bmap __P((struct vop_bmap_args *ap));
 #if defined(__FreeBSD__)
 static int	hpfs_getpages __P((struct vop_getpages_args *ap));
 static int	hpfs_putpages __P((struct vop_putpages_args *));
 static int	hpfs_fsync __P((struct vop_fsync_args *ap));
 #else
 static int	hpfs_abortop __P((struct vop_abortop_args *));
 #endif
 static int	hpfs_pathconf __P((struct vop_pathconf_args *ap));
 
 #if defined(__FreeBSD__)
 int
 hpfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 int
 hpfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 
 static int
 hpfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 	struct buf *bp, *nbp;
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("hpfs_fsync: not dirty");
 		bremfree(bp);
 		splx(s);
 		(void) bwrite(bp);
 		goto loop;
 	}
 	while (vp->v_numoutput) {
 		vp->v_flag |= VBWAIT;
 		(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "hpfsn", 0);
 	}
 #ifdef DIAGNOSTIC
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vprint("hpfs_fsync: dirty", vp);
 		goto loop;
 	}
 #endif
 	splx(s);
 
 	/*
 	 * Write out the on-disc version of the vnode.
 	 */
 	return hpfs_update(VTOHP(vp));
 }
 
 #endif
 
 static int
 hpfs_ioctl (
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		u_long a_command;
 		caddr_t a_data;
 		int a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap)
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	int error;
 
 	printf("hpfs_ioctl(0x%x, 0x%lx, 0x%p, 0x%x): ",
 		hp->h_no, ap->a_command, ap->a_data, ap->a_fflag);
 
 	switch (ap->a_command) {
 	case HPFSIOCGEANUM: {
 		u_long eanum;
 		u_long passed;
 		struct ea *eap;
 
 		eanum = 0;
 
 		if (hp->h_fn.fn_ealen > 0) {
 			eap = (struct ea *)&(hp->h_fn.fn_int);
 			passed = 0;
 
 			while (passed < hp->h_fn.fn_ealen) {
 
 				printf("EAname: %s\n", EA_NAME(eap));
 
 				eanum++;
 				passed += sizeof(struct ea) +
 					  eap->ea_namelen + 1 + eap->ea_vallen;
 				eap = (struct ea *)((caddr_t)hp->h_fn.fn_int +
 						passed);
 			}
 			error = 0;
 		} else {
 			error = ENOENT;
 		}
 
 		printf("%lu eas\n", eanum);
 
 		*(u_long *)ap->a_data = eanum;
 
 		break;
 	}
 	case HPFSIOCGEASZ: {
 		u_long eanum;
 		u_long passed;
 		struct ea *eap;
 
 		printf("EA%ld\n", *(u_long *)ap->a_data);
 
 		eanum = 0;
 		if (hp->h_fn.fn_ealen > 0) {
 			eap = (struct ea *)&(hp->h_fn.fn_int);
 			passed = 0;
 
 			error = ENOENT;
 			while (passed < hp->h_fn.fn_ealen) {
 				printf("EAname: %s\n", EA_NAME(eap));
 
 				if (eanum == *(u_long *)ap->a_data) {
 					*(u_long *)ap->a_data =
 					  	eap->ea_namelen + 1 +
 						eap->ea_vallen;
 
 					error = 0;
 					break;
 				}
 
 				eanum++;
 				passed += sizeof(struct ea) +
 					  eap->ea_namelen + 1 + eap->ea_vallen;
 				eap = (struct ea *)((caddr_t)hp->h_fn.fn_int +
 						passed);
 			}
 		} else {
 			error = ENOENT;
 		}
 
 		break;
 	}
 	case HPFSIOCRDEA: {
 		u_long eanum;
 		u_long passed;
 		struct hpfs_rdea *rdeap;
 		struct ea *eap;
 
 		rdeap = (struct hpfs_rdea *)ap->a_data;
 		printf("EA%ld\n", rdeap->ea_no);
 
 		eanum = 0;
 		if (hp->h_fn.fn_ealen > 0) {
 			eap = (struct ea *)&(hp->h_fn.fn_int);
 			passed = 0;
 
 			error = ENOENT;
 			while (passed < hp->h_fn.fn_ealen) {
 				printf("EAname: %s\n", EA_NAME(eap));
 
 				if (eanum == rdeap->ea_no) {
 					rdeap->ea_sz = eap->ea_namelen + 1 +
 							eap->ea_vallen;
 					copyout(EA_NAME(eap),rdeap->ea_data,
 						rdeap->ea_sz);
 					error = 0;
 					break;
 				}
 
 				eanum++;
 				passed += sizeof(struct ea) +
 					  eap->ea_namelen + 1 + eap->ea_vallen;
 				eap = (struct ea *)((caddr_t)hp->h_fn.fn_int +
 						passed);
 			}
 		} else {
 			error = ENOENT;
 		}
 
 		break;
 	}
 	default:
 		error = EOPNOTSUPP;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Map file offset to disk offset.
  */
 int
 hpfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct hpfsnode *hp = VTOHP(ap->a_vp);
 	int error;
 
 	if (ap->a_vpp != NULL) 
 		*ap->a_vpp = hp->h_devvp;
 #if defined(__FreeBSD__)
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 #endif
 	if (ap->a_bnp == NULL)
 		return (0);
 
 	dprintf(("hpfs_bmap(0x%x, 0x%x): ",hp->h_no, ap->a_bn));
 
 	error = hpfs_hpbmap (hp, ap->a_bn, ap->a_bnp, ap->a_runp);
 
 	return (error);
 }
 
 static int
 hpfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	u_int xfersz, toread;
 	u_int off;
 	daddr_t lbn, bn;
 	int resid;
 	int runl;
 	int error = 0;
 
 	resid = min (uio->uio_resid, hp->h_fn.fn_size - uio->uio_offset);
 
 	dprintf(("hpfs_read(0x%x, off: %d resid: %d, segflg: %d): [resid: 0x%x]\n",hp->h_no,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg, resid));
 
 	while (resid) {
 		lbn = uio->uio_offset >> DEV_BSHIFT;
 		off = uio->uio_offset & (DEV_BSIZE - 1);
 		dprintf(("hpfs_read: resid: 0x%x lbn: 0x%x off: 0x%x\n",
 			uio->uio_resid, lbn, off));
 		error = hpfs_hpbmap(hp, lbn, &bn, &runl);
 		if (error)
 			return (error);
 
 		toread = min(off + resid, min(DFLTPHYS, (runl+1)*DEV_BSIZE));
 		xfersz = (toread + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		dprintf(("hpfs_read: bn: 0x%x (0x%x) toread: 0x%x (0x%x)\n",
 			bn, runl, toread, xfersz));
 
 		if (toread == 0) 
 			break;
 
 		error = bread(hp->h_devvp, bn, xfersz, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			break;
 		}
 
 		error = uiomove(bp->b_data + off, toread - off, uio);
 		if(error) {
 			brelse(bp);
 			break;
 		}
 		brelse(bp);
 		resid -= toread;
 	}
 	dprintf(("hpfs_read: successful\n"));
 	return (error);
 }
 
 static int
 hpfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	u_int xfersz, towrite;
 	u_int off;
 	daddr_t lbn, bn;
 	int runl;
 	int error = 0;
 
 	dprintf(("hpfs_write(0x%x, off: %d resid: %d, segflg: %d):\n",hp->h_no,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	if (ap->a_ioflag & IO_APPEND) {
 		dprintf(("hpfs_write: APPEND mode\n"));
 		uio->uio_offset = hp->h_fn.fn_size;
 	}
 	if (uio->uio_offset + uio->uio_resid > hp->h_fn.fn_size) {
 		error = hpfs_extend (hp, uio->uio_offset + uio->uio_resid);
 		if (error) {
 			printf("hpfs_write: hpfs_extend FAILED %d\n", error);
 			return (error);
 		}
 	}
 
 	while (uio->uio_resid) {
 		lbn = uio->uio_offset >> DEV_BSHIFT;
 		off = uio->uio_offset & (DEV_BSIZE - 1);
 		dprintf(("hpfs_write: resid: 0x%x lbn: 0x%x off: 0x%x\n",
 			uio->uio_resid, lbn, off));
 		error = hpfs_hpbmap(hp, lbn, &bn, &runl);
 		if (error)
 			return (error);
 
 		towrite = min(off + uio->uio_resid, min(DFLTPHYS, (runl+1)*DEV_BSIZE));
 		xfersz = (towrite + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		dprintf(("hpfs_write: bn: 0x%x (0x%x) towrite: 0x%x (0x%x)\n",
 			bn, runl, towrite, xfersz));
 
 		if ((off == 0) && (towrite == xfersz)) {
 			bp = getblk(hp->h_devvp, bn, xfersz, 0, 0);
 			clrbuf(bp);
 		} else {
 			error = bread(hp->h_devvp, bn, xfersz, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		error = uiomove(bp->b_data + off, towrite - off, uio);
 		if(error) {
 			brelse(bp);
 			return (error);
 		}
 
 		if (ap->a_ioflag & IO_SYNC)
 			bwrite(bp);
 		else
 			bawrite(bp);
 	}
 
 	dprintf(("hpfs_write: successful\n"));
 	return (0);
 }
 
 static int
 hpfs_bypass(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	dprintf(("hpfs_bypass: %s\n", ap->a_desc->vdesc_name));
 	return (0);
 }
 
 /*
  * XXXXX do we need hpfsnode locking inside?
  */
 static int
 hpfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error;
 
 	dprintf(("hpfs_getattr(0x%x):\n", hp->h_no));
 
 #if defined(__FreeBSD__)
 	vap->va_fsid = dev2udev(hp->h_dev);
 #else /* defined(__NetBSD__) */
 	vap->va_fsid = ip->i_dev;
 #endif
 	vap->va_fileid = hp->h_no;
 	vap->va_mode = hp->h_mode;
 	vap->va_nlink = 1;
 	vap->va_uid = hp->h_uid;
 	vap->va_gid = hp->h_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
 	vap->va_size = hp->h_fn.fn_size;
 	vap->va_bytes = ((hp->h_fn.fn_size + DEV_BSIZE-1) & ~(DEV_BSIZE-1)) +
 			DEV_BSIZE;
 
 	if (!(hp->h_flag & H_PARVALID)) {
 		error = hpfs_validateparent(hp);
 		if (error) 
 			return (error);
 	}
 	vap->va_atime = hpfstimetounix(hp->h_atime);
 	vap->va_mtime = hpfstimetounix(hp->h_mtime);
 	vap->va_ctime = hpfstimetounix(hp->h_ctime);
 
 	vap->va_flags = 0;
 	vap->va_gen = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	vap->va_type = vp->v_type;
 	vap->va_filerev = 0;
 
 	return (0);
 }
 
 /*
  * XXXXX do we need hpfsnode locking inside?
  */
 static int
 hpfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct hpfsnode *hp = VTOHP(vp);
 	struct vattr *vap = ap->a_vap;
 	struct ucred *cred = ap->a_cred;
 	struct proc *p = ap->a_p;
 	int error;
 
 	dprintf(("hpfs_setattr(0x%x):\n", hp->h_no));
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		dprintf(("hpfs_setattr: changing nonsettable attr\n"));
 		return (EINVAL);
 	}
 
 	/* Can't change flags XXX Could be implemented */
 	if (vap->va_flags != VNOVAL) {
 		printf("hpfs_setattr: FLAGS CANNOT BE SET\n");
 		return (EINVAL);
 	}
 
 	/* Can't change uid/gid XXX Could be implemented */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		printf("hpfs_setattr: UID/GID CANNOT BE SET\n");
 		return (EINVAL);
 	}
 
 	/* Can't change mode XXX Could be implemented */
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		printf("hpfs_setattr: MODE CANNOT BE SET\n");
 		return (EINVAL);
 	}
 
 	/* Update times */
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != hp->h_uid &&
 		    (error = suser_xxx(cred, p, PRISON_ROOT)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
 			return (error);
 		if (vap->va_atime.tv_sec != VNOVAL)
 			hp->h_atime = vap->va_atime.tv_sec;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			hp->h_mtime = vap->va_mtime.tv_sec;
 
 		hp->h_flag |= H_PARCHANGE;
 	}
 
 	if (vap->va_size != VNOVAL) {
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		default:
 			printf("hpfs_setattr: WRONG v_type\n");
 			return (EINVAL);
 		}
 
 		if (vap->va_size < hp->h_fn.fn_size) {
 #if defined(__FreeBSD__)
 			error = vtruncbuf(vp, cred, p, vap->va_size, DEV_BSIZE);
 			if (error)
 				return (error);
 #else /* defined(__NetBSD__) */
 #error Need alternation for vtruncbuf()
 #endif
 			error = hpfs_truncate(hp, vap->va_size);
 			if (error)
 				return (error);
 
 		} else if (vap->va_size > hp->h_fn.fn_size) {
 #if defined(__FreeBSD__)
 			vnode_pager_setsize(vp, vap->va_size);
 #endif
 			error = hpfs_extend(hp, vap->va_size);
 			if (error)
 				return (error);
 		}
 	}
 
 	return (0);
 }
 
 /*
  * Last reference to an node.  If necessary, write or delete it.
  */
 int
 hpfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	int error;
 
 	dprintf(("hpfs_inactive(0x%x): \n", hp->h_no));
 
 	if (hp->h_flag & H_CHANGE) {
 		dprintf(("hpfs_inactive: node changed, update\n"));
 		error = hpfs_update (hp);
 		if (error)
 			return (error);
 	}
 
 	if (hp->h_flag & H_PARCHANGE) {
 		dprintf(("hpfs_inactive: parent node changed, update\n"));
 		error = hpfs_updateparent (hp);
 		if (error)
 			return (error);
 	}
 
 	if (prtactive && vp->v_usecount != 0)
 		vprint("hpfs_inactive: pushing active", vp);
 
 	if (hp->h_flag & H_INVAL) {
 		VOP__UNLOCK(vp,0,ap->a_p);
 #if defined(__FreeBSD__)
 		vrecycle(vp, NULL, ap->a_p);
 #else /* defined(__NetBSD__) */
 		vgone(vp);
 #endif
 		return (0);
 	}
 
 	VOP__UNLOCK(vp,0,ap->a_p);
 	return (0);
 }
 
 /*
  * Reclaim an inode so that it can be used for other purposes.
  */
 int
 hpfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 
 	dprintf(("hpfs_reclaim(0x%x0): \n", hp->h_no));
 
 	hpfs_hphashrem(hp);
 
 	/* Purge old data structures associated with the inode. */
 	cache_purge(vp);
 	if (hp->h_devvp) {
 		vrele(hp->h_devvp);
 		hp->h_devvp = NULL;
 	}
 
 	lockdestroy(&hp->h_lock);
 	mtx_destroy(&hp->h_interlock);
 
 	vp->v_data = NULL;
 
 	FREE(hp, M_HPFSNO);
 
 	return (0);
 }
 
 static int
 hpfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 
 	printf("tag VT_HPFS, ino 0x%x",hp->h_no);
 	lockmgr_printinfo(&hp->h_lock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the VOP_BMAP operation may not
  * deadlock on memory.  See hpfs_bmap() for details. XXXXXXX (not impl)
  */
 int
 hpfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = ap->a_vp;
 	struct vnode *nvp;
 	int error;
 
 	dprintf(("hpfs_strategy(): \n"));
 
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("hpfs_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = VOP_BMAP(vp, bp->b_lblkno, &nvp, &bp->b_blkno, NULL, NULL);
 		if (error) {
 			printf("hpfs_strategy: VOP_BMAP FAILED %d\n", error);
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	bp->b_dev = nvp->v_rdev;
 	VOP_STRATEGY(nvp, bp);
 	return (0);
 }
 
 /*
  * XXXXX do we need hpfsnode locking inside?
  */
 int
 hpfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct hpfsnode *hp = VTOHP(vp);
 	mode_t mode = ap->a_mode;
 
 	dprintf(("hpfs_access(0x%x):\n", hp->h_no));
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch ((int)vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
 		}
 	}
 
 	return (vaccess(vp->v_type, hp->h_mode, hp->h_uid, hp->h_gid,
 	    ap->a_mode, ap->a_cred, NULL));
 }
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 static int
 hpfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if HPFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 
 	printf("hpfs_open(0x%x):\n",hp->h_no);
 #endif
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 hpfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if HPFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 
 	printf("hpfs_close: %d\n",hp->h_no);
 #endif
 
 	return (0);
 }
 
 static int
 hpfs_de_uiomove (
 	struct hpfsmount *hpmp,
 	struct hpfsdirent *dep,
 	struct uio *uio)
 {
 	struct dirent cde;
 	int i, error;
 
 	dprintf(("[no: 0x%x, size: %d, name: %2d:%.*s, flag: 0x%x] ",
 		dep->de_fnode, dep->de_size, dep->de_namelen,
 		dep->de_namelen, dep->de_name, dep->de_flag));
 
 	/*strncpy(cde.d_name, dep->de_name, dep->de_namelen);*/
 	for (i=0; i<dep->de_namelen; i++) 
 		cde.d_name[i] = hpfs_d2u(hpmp, dep->de_name[i]);
 
 	cde.d_name[dep->de_namelen] = '\0';
 	cde.d_namlen = dep->de_namelen;
 	cde.d_fileno = dep->de_fnode;
 	cde.d_type = (dep->de_flag & DE_DIR) ? DT_DIR : DT_REG;
 	cde.d_reclen = sizeof(struct dirent);
 
 	error = uiomove((char *)&cde, sizeof(struct dirent), uio);
 	if (error)
 		return (error);
 	
 	dprintf(("[0x%x] ", uio->uio_resid));
 	return (error);
 }
 
 
 static struct dirent hpfs_de_dot =
 	{ 0, sizeof(struct dirent), DT_DIR, 1, "." };
 static struct dirent hpfs_de_dotdot =
 	{ 0, sizeof(struct dirent), DT_DIR, 2, ".." };
 int
 hpfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_ncookies;
 		u_int **cookies;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct hpfsnode *hp = VTOHP(vp);
 	struct hpfsmount *hpmp = hp->h_hpmp;
 	struct uio *uio = ap->a_uio;
 	int ncookies = 0, i, num, cnum;
 	int error = 0;
 	off_t off;
 	struct buf *bp;
 	struct dirblk *dp;
 	struct hpfsdirent *dep;
 	lsn_t olsn;
 	lsn_t lsn;
 	int level;
 
 	dprintf(("hpfs_readdir(0x%x, 0x%x, 0x%x): ",hp->h_no,(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	off = uio->uio_offset;
 
 	if( uio->uio_offset < sizeof(struct dirent) ) {
 		dprintf((". faked, "));
 		hpfs_de_dot.d_fileno = hp->h_no;
 		error = uiomove((char *)&hpfs_de_dot,sizeof(struct dirent),uio);
 		if(error) {
 			return (error);
 		}
 
 		ncookies ++;
 	}
 
 	if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
 		dprintf((".. faked, "));
 		hpfs_de_dotdot.d_fileno = hp->h_fn.fn_parent;
 
 		error = uiomove((char *)&hpfs_de_dotdot, sizeof(struct dirent),
 				uio);
 		if(error) {
 			return (error);
 		}
 
 		ncookies ++;
 	}
 
 	num = uio->uio_offset / sizeof(struct dirent) - 2;
 	cnum = 0;
 
 	lsn = ((alleaf_t *)hp->h_fn.fn_abd)->al_lsn;
 
 	olsn = 0;
 	level = 1;
 
 dive:
 	dprintf(("[dive 0x%x] ", lsn));
 	error = bread(hp->h_devvp, lsn, D_BSIZE, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 
 	dp = (struct dirblk *) bp->b_data;
 	if (dp->d_magic != D_MAGIC) {
 		printf("hpfs_readdir: MAGIC DOESN'T MATCH\n");
 		brelse(bp);
 		return (EINVAL);
 	}
 
 	dep = D_DIRENT(dp);
 
 	if (olsn) {
 		dprintf(("[restore 0x%x] ", olsn));
 
 		while(!(dep->de_flag & DE_END) ) {
 			if((dep->de_flag & DE_DOWN) &&
 			   (olsn == DE_DOWNLSN(dep)))
 					 break;
 			dep = (hpfsdirent_t *)((caddr_t)dep + dep->de_reclen);
 		}
 
 		if((dep->de_flag & DE_DOWN) && (olsn == DE_DOWNLSN(dep))) {
 			if (dep->de_flag & DE_END)
 				goto blockdone;
 
 			if (!(dep->de_flag & DE_SPECIAL)) {
 				if (num <= cnum) {
 					if (uio->uio_resid < sizeof(struct dirent)) {
 						brelse(bp);
 						dprintf(("[resid] "));
 						goto readdone;
 					}
 
 					error = hpfs_de_uiomove(hpmp, dep, uio);
 					if (error) {
 						brelse (bp);
 						return (error);
 					}
 					ncookies++;
 
 					if (uio->uio_resid < sizeof(struct dirent)) {
 						brelse(bp);
 						dprintf(("[resid] "));
 						goto readdone;
 					}
 				}
 				cnum++;
 			}
 
 			dep = (hpfsdirent_t *)((caddr_t)dep + dep->de_reclen);
 		} else {
 			printf("hpfs_readdir: ERROR! oLSN not found\n");
 			brelse(bp);
 			return (EINVAL);
 		}
 	}
 
 	olsn = 0;
 
 	while(!(dep->de_flag & DE_END)) {
 		if(dep->de_flag & DE_DOWN) {
 			lsn = DE_DOWNLSN(dep);
 			brelse(bp);
 			level++;
 			goto dive;
 		}
 
 		if (!(dep->de_flag & DE_SPECIAL)) {
 			if (num <= cnum) {
 				if (uio->uio_resid < sizeof(struct dirent)) {
 					brelse(bp);
 					dprintf(("[resid] "));
 					goto readdone;
 				}
 
 				error = hpfs_de_uiomove(hpmp, dep, uio);
 				if (error) {
 					brelse (bp);
 					return (error);
 				}
 				ncookies++;
 				
 				if (uio->uio_resid < sizeof(struct dirent)) {
 					brelse(bp);
 					dprintf(("[resid] "));
 					goto readdone;
 				}
 			}
 			cnum++;
 		}
 
 		dep = (hpfsdirent_t *)((caddr_t)dep + dep->de_reclen);
 	}
 
 	if(dep->de_flag & DE_DOWN) {
 		dprintf(("[enddive] "));
 		lsn = DE_DOWNLSN(dep);
 		brelse(bp);
 		level++;
 		goto dive;
 	}
 
 blockdone:
 	dprintf(("[EOB] "));
 	olsn = lsn;
 	lsn = dp->d_parent;
 	brelse(bp);
 	level--;
 
 	dprintf(("[level %d] ", level));
 
 	if (level > 0)
 		goto dive;	/* undive really */
 
 	if (ap->a_eofflag) {
 	    dprintf(("[EOF] "));
 	    *ap->a_eofflag = 1;
 	}
 
 readdone:
 	dprintf(("[readdone]\n"));
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dp;
 #if defined(__FreeBSD__)
 		u_long *cookies;
 		u_long *cookiep;
 #else /* defined(__NetBSD__) */
 		off_t *cookies;
 		off_t *cookiep;
 #endif
 
 		dprintf(("%d cookies, ",ncookies));
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("hpfs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     ((caddr_t)uio->uio_iov->iov_base -
 			 (uio->uio_offset - off));
 #if defined(__FreeBSD__)
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
 		       M_TEMP, M_WAITOK);
 #else /* defined(__NetBSD__) */
 		MALLOC(cookies, off_t *, ncookies * sizeof(off_t),
 		       M_TEMP, M_WAITOK);
 #endif
 		for (dp = dpStart, cookiep = cookies, i=0;
 		     i < ncookies;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen), i++) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_int) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 
 	return (0);
 }
 
 int
 hpfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct hpfsnode *dhp = VTOHP(dvp);
 	struct hpfsmount *hpmp = dhp->h_hpmp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int error;
 	int nameiop = cnp->cn_nameiop;
 	int flags = cnp->cn_flags;
 	int lockparent = flags & LOCKPARENT;
 #if HPFS_DEBUG
 	int wantparent = flags & (LOCKPARENT|WANTPARENT);
 #endif
 	dprintf(("hpfs_lookup(0x%x, %s, %ld, %d, %d): \n",
 		dhp->h_no, cnp->cn_nameptr, cnp->cn_namelen,
 		lockparent, wantparent));
 
 	if (nameiop != CREATE && nameiop != DELETE && nameiop != LOOKUP) {
 		printf("hpfs_lookup: LOOKUP, DELETE and CREATE are only supported\n");
 		return (EOPNOTSUPP);
 	}
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, cnp->cn_proc);
 	if(error)
 		return (error);
 
 	if( (cnp->cn_namelen == 1) &&
 	    !strncmp(cnp->cn_nameptr,".",1) ) {
 		dprintf(("hpfs_lookup(0x%x,...): . faked\n",dhp->h_no));
 
 		VREF(dvp);
 		*ap->a_vpp = dvp;
 
 		return (0);
 	} else if( (cnp->cn_namelen == 2) &&
 	    !strncmp(cnp->cn_nameptr,"..",2) && (flags & ISDOTDOT) ) {
 		dprintf(("hpfs_lookup(0x%x,...): .. faked (0x%x)\n",
 			dhp->h_no, dhp->h_fn.fn_parent));
 
 		VOP__UNLOCK(dvp,0,cnp->cn_proc);
 
 		error = VFS_VGET(hpmp->hpm_mp,
 				 dhp->h_fn.fn_parent, ap->a_vpp); 
 		if(error) {
 			VOP__LOCK(dvp, 0, cnp->cn_proc);
 			return(error);
 		}
 
 		if( lockparent && (flags & ISLASTCN) && 
 		    (error = VOP__LOCK(dvp, 0, cnp->cn_proc)) ) {
 			vput( *(ap->a_vpp) );
 			return (error);
 		}
 		return (error);
 	} else {
 		struct buf *bp;
 		struct hpfsdirent *dep;
 		struct hpfsnode *hp;
 
 		error = hpfs_genlookupbyname(dhp,
 				cnp->cn_nameptr, cnp->cn_namelen, &bp, &dep);
 		if (error) {
 			if ((error == ENOENT) && (flags & ISLASTCN) &&
 			    (nameiop == CREATE || nameiop == RENAME)) {
 				if(!lockparent)
 					VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 				cnp->cn_flags |= SAVENAME;
 				return (EJUSTRETURN);
 			}
 
 			return (error);
 		}
 
 		dprintf(("hpfs_lookup: fnode: 0x%x, CPID: 0x%x\n",
 			 dep->de_fnode, dep->de_cpid));
 
 		if (nameiop == DELETE && (flags & ISLASTCN)) {
 			error = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_proc);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 		}
 
 		if (dhp->h_no == dep->de_fnode) {
 			brelse(bp);
 			VREF(dvp);
 			*ap->a_vpp = dvp;
 			return (0);
 		}
 
 		error = VFS_VGET(hpmp->hpm_mp, dep->de_fnode, ap->a_vpp);
 		if (error) {
 			printf("hpfs_lookup: VFS_VGET FAILED %d\n", error);
 			brelse(bp);
 			return(error);
 		}
 
 		hp = VTOHP(*ap->a_vpp);
 
 		hp->h_mtime = dep->de_mtime;
 		hp->h_ctime = dep->de_ctime;
 		hp->h_atime = dep->de_atime;
 		bcopy(dep->de_name, hp->h_name, dep->de_namelen);
 		hp->h_name[dep->de_namelen] = '\0';
 		hp->h_namelen = dep->de_namelen;
 		hp->h_flag |= H_PARVALID;
 
 		brelse(bp);
 
 		if(!lockparent || !(flags & ISLASTCN))
 			VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 		if ((flags & MAKEENTRY) &&
 		    (!(flags & ISLASTCN) || 
 		     (nameiop != DELETE && nameiop != CREATE)))
 			cache_enter(dvp, *ap->a_vpp, cnp);
 	}
 	return (error);
 }
 
 int
 hpfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	int error;
 
 	dprintf(("hpfs_remove(0x%x, %s, %ld): \n", VTOHP(ap->a_vp)->h_no,
 		ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen));
 
 	if (ap->a_vp->v_type == VDIR)
 		return (EPERM);
 
 	error = hpfs_removefnode (ap->a_dvp, ap->a_vp, ap->a_cnp);
 	return (error);
 }
 
 int
 hpfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	int error;
 
 	dprintf(("hpfs_create(0x%x, %s, %ld): \n", VTOHP(ap->a_dvp)->h_no,
 		ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen));
 
 	if (!(ap->a_cnp->cn_flags & HASBUF)) 
 		panic ("hpfs_create: no name\n");
 
 	error = hpfs_makefnode (ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
 
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to NTFS filesystem
  */
 int
 hpfs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap;
 {
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = HPFS_MAXFILENAME;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 #if defined(__NetBSD__)
 	case _PC_SYNC_IO:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 32;
 		return (0);
 #endif
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 
 /*
  * Global vfs data structures
  */
 vop_t **hpfs_vnodeop_p;
 #if defined(__FreeBSD__)
 struct vnodeopv_entry_desc hpfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *)hpfs_bypass },
 
 	{ &vop_getattr_desc, (vop_t *)hpfs_getattr },
 	{ &vop_setattr_desc, (vop_t *)hpfs_setattr },
 	{ &vop_inactive_desc, (vop_t *)hpfs_inactive },
 	{ &vop_reclaim_desc, (vop_t *)hpfs_reclaim },
 	{ &vop_print_desc, (vop_t *)hpfs_print },
 	{ &vop_create_desc, (vop_t *)hpfs_create },
 	{ &vop_remove_desc, (vop_t *)hpfs_remove },
 	{ &vop_islocked_desc, (vop_t *)vop_stdislocked },
 	{ &vop_unlock_desc, (vop_t *)vop_stdunlock },
 	{ &vop_lock_desc, (vop_t *)vop_stdlock },
 	{ &vop_cachedlookup_desc, (vop_t *)hpfs_lookup },
 	{ &vop_lookup_desc, (vop_t *)vfs_cache_lookup },
 	{ &vop_access_desc, (vop_t *)hpfs_access },
 	{ &vop_close_desc, (vop_t *)hpfs_close },
 	{ &vop_open_desc, (vop_t *)hpfs_open },
 	{ &vop_readdir_desc, (vop_t *)hpfs_readdir },
 	{ &vop_fsync_desc, (vop_t *)hpfs_fsync },
 	{ &vop_bmap_desc, (vop_t *)hpfs_bmap },
 	{ &vop_getpages_desc, (vop_t *) hpfs_getpages },
 	{ &vop_putpages_desc, (vop_t *) hpfs_putpages },
 	{ &vop_strategy_desc, (vop_t *)hpfs_strategy },
-	{ &vop_bwrite_desc, (vop_t *)vop_stdbwrite },
 	{ &vop_read_desc, (vop_t *)hpfs_read },
 	{ &vop_write_desc, (vop_t *)hpfs_write },
 	{ &vop_ioctl_desc, (vop_t *)hpfs_ioctl },
 	{ &vop_pathconf_desc, (vop_t *)hpfs_pathconf },
 	{ NULL, NULL }
 };
 
 static
 struct vnodeopv_desc hpfs_vnodeop_opv_desc =
 	{ &hpfs_vnodeop_p, hpfs_vnodeop_entries };
 
 VNODEOP_SET(hpfs_vnodeop_opv_desc);
 #else /* defined(__NetBSD__) */
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *) hpfs_bypass },
 	{ &vop_lookup_desc, (vop_t *) hpfs_lookup },	/* lookup */
 	{ &vop_create_desc, genfs_eopnotsupp },		/* create */
 	{ &vop_mknod_desc, genfs_eopnotsupp },		/* mknod */
 	{ &vop_open_desc, (vop_t *) hpfs_open },	/* open */
 	{ &vop_close_desc,(vop_t *) hpfs_close },	/* close */
 	{ &vop_access_desc, (vop_t *) hpfs_access },	/* access */
 	{ &vop_getattr_desc, (vop_t *) hpfs_getattr },	/* getattr */
 	{ &vop_setattr_desc, genfs_eopnotsupp },	/* setattr */
 	{ &vop_read_desc, (vop_t *) hpfs_read },	/* read */
 	{ &vop_write_desc, (vop_t *) hpfs_write },	/* write */
 	{ &vop_lease_desc, genfs_lease_check },		/* lease */
 	{ &vop_fcntl_desc, genfs_fcntl },		/* fcntl */
 	{ &vop_ioctl_desc, genfs_enoioctl },		/* ioctl */
 	{ &vop_poll_desc, genfs_poll },			/* poll */
 	{ &vop_revoke_desc, genfs_revoke },		/* revoke */
 	{ &vop_fsync_desc, genfs_fsync },		/* fsync */
 	{ &vop_seek_desc, genfs_seek },			/* seek */
 	{ &vop_remove_desc, genfs_eopnotsupp },		/* remove */
 	{ &vop_link_desc, genfs_eopnotsupp },		/* link */
 	{ &vop_rename_desc, genfs_eopnotsupp },		/* rename */
 	{ &vop_mkdir_desc, genfs_eopnotsupp },		/* mkdir */
 	{ &vop_rmdir_desc, genfs_eopnotsupp },		/* rmdir */
 	{ &vop_symlink_desc, genfs_eopnotsupp },	/* symlink */
 	{ &vop_readdir_desc, (vop_t *) hpfs_readdir },	/* readdir */
 	{ &vop_readlink_desc, genfs_eopnotsupp },	/* readlink */
 	{ &vop_abortop_desc, genfs_abortop },		/* abortop */
 	{ &vop_inactive_desc, (vop_t *) hpfs_inactive },	/* inactive */
 	{ &vop_reclaim_desc, (vop_t *) hpfs_reclaim },	/* reclaim */
 	{ &vop_lock_desc, genfs_lock },			/* lock */
 	{ &vop_unlock_desc, genfs_unlock },		/* unlock */
 	{ &vop_bmap_desc, (vop_t *) hpfs_bmap },	/* bmap */
 	{ &vop_strategy_desc, (vop_t *) hpfs_strategy },	/* strategy */
 	{ &vop_print_desc, (vop_t *) hpfs_print },	/* print */
 	{ &vop_islocked_desc, genfs_islocked },		/* islocked */
 	{ &vop_pathconf_desc, hpfs_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, genfs_nullop },		/* advlock */
 	{ &vop_blkatoff_desc, genfs_eopnotsupp },	/* blkatoff */
 	{ &vop_valloc_desc, genfs_eopnotsupp },		/* valloc */
 	{ &vop_reallocblks_desc, genfs_eopnotsupp },	/* reallocblks */
 	{ &vop_vfree_desc, genfs_eopnotsupp },		/* vfree */
 	{ &vop_truncate_desc, genfs_eopnotsupp },	/* truncate */
 	{ &vop_update_desc, genfs_eopnotsupp },		/* update */
 	{ &vop_bwrite_desc, vn_bwrite },		/* bwrite */
 	{ (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
 };
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 #endif
Index: head/sys/fs/ntfs/ntfs_vnops.c
===================================================================
--- head/sys/fs/ntfs/ntfs_vnops.c	(revision 75579)
+++ head/sys/fs/ntfs/ntfs_vnops.c	(revision 75580)
@@ -1,943 +1,942 @@
 /*	$NetBSD: ntfs_vnops.c,v 1.23 1999/10/31 19:45:27 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * John Heidemann of the UCLA Ficus project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/dirent.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #if defined(__NetBSD__)
 #include <vm/vm_prot.h>
 #endif
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #if defined(__FreeBSD__)
 #include <vm/vnode_pager.h>
 #endif
 #include <vm/vm_extern.h>
 
 #include <sys/sysctl.h>
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #if defined(__NetBSD__)
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/genfs/genfs.h>
 #endif
 
 #include <sys/unistd.h> /* for pathconf(2) constants */
 
 static int	ntfs_read __P((struct vop_read_args *));
 static int	ntfs_write __P((struct vop_write_args *ap));
 static int	ntfs_getattr __P((struct vop_getattr_args *ap));
 static int	ntfs_inactive __P((struct vop_inactive_args *ap));
 static int	ntfs_print __P((struct vop_print_args *ap));
 static int	ntfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	ntfs_strategy __P((struct vop_strategy_args *ap));
 static int	ntfs_access __P((struct vop_access_args *ap));
 static int	ntfs_open __P((struct vop_open_args *ap));
 static int	ntfs_close __P((struct vop_close_args *ap));
 static int	ntfs_readdir __P((struct vop_readdir_args *ap));
 static int	ntfs_lookup __P((struct vop_lookup_args *ap));
 static int	ntfs_bmap __P((struct vop_bmap_args *ap));
 #if defined(__FreeBSD__)
 static int	ntfs_getpages __P((struct vop_getpages_args *ap));
 static int	ntfs_putpages __P((struct vop_putpages_args *));
 static int	ntfs_fsync __P((struct vop_fsync_args *ap));
 #else
 static int	ntfs_bypass __P((struct vop_generic_args *ap));
 #endif
 static int	ntfs_pathconf __P((void *));
 
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
 #if defined(__FreeBSD__)
 int
 ntfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 int
 ntfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 #endif
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 int
 ntfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	dprintf(("ntfs_bmap: vn: %p, blk: %d\n", ap->a_vp,(u_int32_t)ap->a_bn));
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 #if !defined(__NetBSD__)
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 #endif
 	return (0);
 }
 
 static int
 ntfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int64_t toread;
 	int error;
 
 	dprintf(("ntfs_read: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	dprintf(("ntfs_read: filesize: %d",(u_int32_t)fp->f_size));
 
 	/* don't allow reading after end of file */
 	if (uio->uio_offset > fp->f_size)
 		toread = 0;
 	else
 		toread = min( uio->uio_resid, fp->f_size - uio->uio_offset );
 
 	dprintf((", toread: %d\n",(u_int32_t)toread));
 
 	if (toread == 0)
 		return (0);
 
 	error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, toread, NULL, uio);
 	if (error) {
 		printf("ntfs_read: ntfs_readattr failed: %d\n",error);
 		return (error);
 	}
 
 	return (0);
 }
 
 #if !defined(__FreeBSD__)
 
 static int
 ntfs_bypass(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	int error = ENOTTY;
 	dprintf(("ntfs_bypass: %s\n", ap->a_desc->vdesc_name));
 	return (error);
 }
 
 #endif
 
 static int
 ntfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	register struct vattr *vap = ap->a_vap;
 
 	dprintf(("ntfs_getattr: %d, flags: %d\n",ip->i_number,ip->i_flag));
 
 #if defined(__FreeBSD__)
 	vap->va_fsid = dev2udev(ip->i_dev);
 #else /* NetBSD */
 	vap->va_fsid = ip->i_dev;
 #endif
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mp->ntm_mode;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_mp->ntm_uid;
 	vap->va_gid = ip->i_mp->ntm_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
 	vap->va_size = fp->f_size;
 	vap->va_bytes = fp->f_allocated;
 	vap->va_atime = ntfs_nttimetounix(fp->f_times.t_access);
 	vap->va_mtime = ntfs_nttimetounix(fp->f_times.t_write);
 	vap->va_ctime = ntfs_nttimetounix(fp->f_times.t_create);
 	vap->va_flags = ip->i_flag;
 	vap->va_gen = 0;
 	vap->va_blocksize = ip->i_mp->ntm_spc * ip->i_mp->ntm_bps;
 	vap->va_type = vp->v_type;
 	vap->va_filerev = 0;
 	return (0);
 }
 
 
 /*
  * Last reference to an ntnode.  If necessary, write or delete it.
  */
 int
 ntfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 #ifdef NTFS_DEBUG
 	register struct ntnode *ip = VTONT(vp);
 #endif
 
 	dprintf(("ntfs_inactive: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_inactive: pushing active", vp);
 
 	VOP__UNLOCK(vp, 0, ap->a_p);
 
 	/* XXX since we don't support any filesystem changes
 	 * right now, nothing more needs to be done
 	 */
 	return (0);
 }
 
 /*
  * Reclaim an fnode/ntnode so that it can be used for other purposes.
  */
 int
 ntfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	int error;
 
 	dprintf(("ntfs_reclaim: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_reclaim: pushing active", vp);
 
 	if ((error = ntfs_ntget(ip)) != 0)
 		return (error);
 	
 	/* Purge old data structures associated with the inode. */
 	cache_purge(vp);
 	if (ip->i_devvp) {
 		vrele(ip->i_devvp);
 		ip->i_devvp = NULL;
 	}
 
 	ntfs_frele(fp);
 	ntfs_ntput(ip);
 	vp->v_data = NULL;
 
 	return (0);
 }
 
 static int
 ntfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 int
 ntfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct ntfsmount *ntmp = ip->i_mp;
 	int error;
 
 #ifdef __FreeBSD__
 	dprintf(("ntfs_strategy: offset: %d, blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_offset,(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 #else
 	dprintf(("ntfs_strategy: blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 #endif
 
 	dprintf(("strategy: bcount: %d flags: 0x%lx\n", 
 		(u_int32_t)bp->b_bcount,bp->b_flags));
 
 	if (bp->b_iocmd == BIO_READ) {
 		u_int32_t toread;
 
 		if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
 			clrbuf(bp);
 			error = 0;
 		} else {
 			toread = min(bp->b_bcount,
 				 fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
 				toread,(u_int32_t)fp->f_size));
 
 			error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),
 				toread, bp->b_data, NULL);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_readattr failed\n");
 				bp->b_error = error;
 				bp->b_ioflags |= BIO_ERROR;
 			}
 
 			bzero(bp->b_data + toread, bp->b_bcount - toread);
 		}
 	} else {
 		size_t tmp;
 		u_int32_t towrite;
 
 		if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
 			printf("ntfs_strategy: CAN'T EXTEND FILE\n");
 			bp->b_error = error = EFBIG;
 			bp->b_ioflags |= BIO_ERROR;
 		} else {
 			towrite = min(bp->b_bcount,
 				fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
 				towrite,(u_int32_t)fp->f_size));
 
 			error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,	
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
 				bp->b_data, &tmp, NULL);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_writeattr fail\n");
 				bp->b_error = error;
 				bp->b_ioflags |= BIO_ERROR;
 			}
 		}
 	}
 	bufdone(bp);
 	return (error);
 }
 
 static int
 ntfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int64_t towrite;
 	size_t written;
 	int error;
 
 	dprintf(("ntfs_write: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 	dprintf(("ntfs_write: filesize: %d",(u_int32_t)fp->f_size));
 
 	if (uio->uio_resid + uio->uio_offset > fp->f_size) {
 		printf("ntfs_write: CAN'T WRITE BEYOND END OF FILE\n");
 		return (EFBIG);
 	}
 
 	towrite = min(uio->uio_resid, fp->f_size - uio->uio_offset);
 
 	dprintf((", towrite: %d\n",(u_int32_t)towrite));
 
 	error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, towrite, NULL, &written, uio);
 #ifdef NTFS_DEBUG
 	if (error)
 		printf("ntfs_write: ntfs_writeattr failed: %d\n", error);
 #endif
 
 	return (error);
 }
 
 int
 ntfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct ntnode *ip = VTONT(vp);
 	mode_t mode = ap->a_mode;
 #ifdef QUOTA
 	int error;
 #endif
 
 	dprintf(("ntfs_access: %d\n",ip->i_number));
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch ((int)vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if (error = getinoquota(ip))
 				return (error);
 #endif
 			break;
 		}
 	}
 
 	return (vaccess(vp->v_type, ip->i_mp->ntm_mode, ip->i_mp->ntm_uid,
 	    ip->i_mp->ntm_gid, ap->a_mode, ap->a_cred, NULL));
 } 
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 static int
 ntfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_open: %d\n",ip->i_number);
 #endif
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ntfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_close: %d\n",ip->i_number);
 #endif
 
 	return (0);
 }
 
 int
 ntfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_ncookies;
 		u_int **cookies;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	int i, error = 0;
 	u_int32_t faked = 0, num;
 	int ncookies = 0;
 	struct dirent cde;
 	off_t off;
 
 	dprintf(("ntfs_readdir %d off: %d resid: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	off = uio->uio_offset;
 
 	/* Simulate . in every dir except ROOT */
 	if( ip->i_number != NTFS_ROOTINO ) {
 		struct dirent dot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 1, "." };
 
 		if( uio->uio_offset < sizeof(struct dirent) ) {
 			dot.d_fileno = ip->i_number;
 			error = uiomove((char *)&dot,sizeof(struct dirent),uio);
 			if(error)
 				return (error);
 
 			ncookies ++;
 		}
 	}
 
 	/* Simulate .. in every dir including ROOT */
 	if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
 		struct dirent dotdot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 2, ".." };
 
 		error = uiomove((char *)&dotdot,sizeof(struct dirent),uio);
 		if(error)
 			return (error);
 
 		ncookies ++;
 	}
 
 	faked = (ip->i_number == NTFS_ROOTINO) ? 1 : 2;
 	num = uio->uio_offset / sizeof(struct dirent) - faked;
 
 	while( uio->uio_resid >= sizeof(struct dirent) ) {
 		struct attr_indexentry *iep;
 
 		error = ntfs_ntreaddir(ntmp, fp, num, &iep);
 
 		if(error)
 			return (error);
 
 		if( NULL == iep )
 			break;
 
 		for(; !(iep->ie_flag & NTFS_IEFLAG_LAST) && (uio->uio_resid >= sizeof(struct dirent));
 			iep = NTFS_NEXTREC(iep, struct attr_indexentry *))
 		{
 			if(!ntfs_isnamepermitted(ntmp,iep))
 				continue;
 
 			for(i=0; i<iep->ie_fnamelen; i++) {
 				cde.d_name[i] = ntfs_u28(iep->ie_fname[i]);
 			}
 			cde.d_name[i] = '\0';
 			dprintf(("ntfs_readdir: elem: %d, fname:[%s] type: %d, flag: %d, ",
 				num, cde.d_name, iep->ie_fnametype,
 				iep->ie_flag));
 			cde.d_namlen = iep->ie_fnamelen;
 			cde.d_fileno = iep->ie_number;
 			cde.d_type = (iep->ie_fflag & NTFS_FFLAG_DIR) ? DT_DIR : DT_REG;
 			cde.d_reclen = sizeof(struct dirent);
 			dprintf(("%s\n", (cde.d_type == DT_DIR) ? "dir":"reg"));
 
 			error = uiomove((char *)&cde, sizeof(struct dirent), uio);
 			if(error)
 				return (error);
 
 			ncookies++;
 			num++;
 		}
 	}
 
 	dprintf(("ntfs_readdir: %d entries (%d bytes) read\n",
 		ncookies,(u_int)(uio->uio_offset - off)));
 	dprintf(("ntfs_readdir: off: %d resid: %d\n",
 		(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dp;
 #if defined(__FreeBSD__)
 		u_long *cookies;
 		u_long *cookiep;
 #else /* defined(__NetBSD__) */
 		off_t *cookies;
 		off_t *cookiep;
 #endif
 
 		ddprintf(("ntfs_readdir: %d cookies\n",ncookies));
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ntfs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     ((caddr_t)uio->uio_iov->iov_base -
 			 (uio->uio_offset - off));
 #if defined(__FreeBSD__)
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
 		       M_TEMP, M_WAITOK);
 #else /* defined(__NetBSD__) */
 		MALLOC(cookies, off_t *, ncookies * sizeof(off_t),
 		       M_TEMP, M_WAITOK);
 #endif
 		for (dp = dpStart, cookiep = cookies, i=0;
 		     i < ncookies;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen), i++) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_int) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 /*
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTONT(ap->a_vp)->i_size <= uio->uio_offset;
 */
 	return (error);
 }
 
 int
 ntfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct ntnode *dip = VTONT(dvp);
 	struct ntfsmount *ntmp = dip->i_mp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int error;
 	int lockparent = cnp->cn_flags & LOCKPARENT;
 #if NTFS_DEBUG
 	int wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
 #endif
 	dprintf(("ntfs_lookup: \"%.*s\" (%ld bytes) in %d, lp: %d, wp: %d \n",
 		(int)cnp->cn_namelen, cnp->cn_nameptr, cnp->cn_namelen,
 		dip->i_number, lockparent, wantparent));
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, cnp->cn_proc);
 	if(error)
 		return (error);
 
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 #ifdef __NetBSD__
 	/*
 	 * We now have a segment name to search for, and a directory
 	 * to search.
 	 *
 	 * Before tediously performing a linear scan of the directory,
 	 * check the name cache to see if the directory/name pair
 	 * we are looking for is known already.
 	 */
 	if ((error = cache_lookup(ap->a_dvp, ap->a_vpp, cnp)) >= 0)
 		return (error);
 #endif
 
 	if(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		dprintf(("ntfs_lookup: faking . directory in %d\n",
 			dip->i_number));
 
 		VREF(dvp);
 		*ap->a_vpp = dvp;
 		error = 0;
 	} else if (cnp->cn_flags & ISDOTDOT) {
 		struct ntvattr *vap;
 
 		dprintf(("ntfs_lookup: faking .. directory in %d\n",
 			 dip->i_number));
 
 		error = ntfs_ntvattrget(ntmp, dip, NTFS_A_NAME, NULL, 0, &vap);
 		if(error)
 			return (error);
 
 		VOP__UNLOCK(dvp,0,cnp->cn_proc);
 		cnp->cn_flags |= PDIRUNLOCK;
 
 		dprintf(("ntfs_lookup: parentdir: %d\n",
 			 vap->va_a_name->n_pnumber));
 		error = VFS_VGET(ntmp->ntm_mountp,
 				 vap->va_a_name->n_pnumber,ap->a_vpp); 
 		ntfs_ntvattrrele(vap);
 		if (error) {
 			if (VN_LOCK(dvp,LK_EXCLUSIVE|LK_RETRY,cnp->cn_proc)==0)
 				cnp->cn_flags &= ~PDIRUNLOCK;
 			return (error);
 		}
 
 		if (lockparent && (cnp->cn_flags & ISLASTCN)) {
 			error = VN_LOCK(dvp, LK_EXCLUSIVE, cnp->cn_proc);
 			if (error) {
 				vput( *(ap->a_vpp) );
 				return (error);
 			}
 			cnp->cn_flags &= ~PDIRUNLOCK;
 		}
 	} else {
 		error = ntfs_ntlookupfile(ntmp, dvp, cnp, ap->a_vpp);
 		if (error) {
 			dprintf(("ntfs_ntlookupfile: returned %d\n", error));
 			return (error);
 		}
 
 		dprintf(("ntfs_lookup: found ino: %d\n", 
 			VTONT(*ap->a_vpp)->i_number));
 
 		if(!lockparent || !(cnp->cn_flags & ISLASTCN))
 			VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 	}
 
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(dvp, *ap->a_vpp, cnp);
 
 	return (error);
 }
 
 #if defined(__FreeBSD__)
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 ntfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	return (0);
 }
 #endif
 
 /*
  * Return POSIX pathconf information applicable to NTFS filesystem
  */
 int
 ntfs_pathconf(v)
 	void *v;
 {
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap = v;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NTFS_MAXFILENAME;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 #if defined(__NetBSD__)
 	case _PC_SYNC_IO:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 #endif
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Global vfs data structures
  */
 vop_t **ntfs_vnodeop_p;
 #if defined(__FreeBSD__)
 static
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *)vop_defaultop },
 
 	{ &vop_getattr_desc, (vop_t *)ntfs_getattr },
 	{ &vop_inactive_desc, (vop_t *)ntfs_inactive },
 	{ &vop_reclaim_desc, (vop_t *)ntfs_reclaim },
 	{ &vop_print_desc, (vop_t *)ntfs_print },
 	{ &vop_pathconf_desc, ntfs_pathconf },
 
 	{ &vop_islocked_desc, (vop_t *)vop_stdislocked },
 	{ &vop_unlock_desc, (vop_t *)vop_stdunlock },
 	{ &vop_lock_desc, (vop_t *)vop_stdlock },
 	{ &vop_cachedlookup_desc, (vop_t *)ntfs_lookup },
 	{ &vop_lookup_desc, (vop_t *)vfs_cache_lookup },
 
 	{ &vop_access_desc, (vop_t *)ntfs_access },
 	{ &vop_close_desc, (vop_t *)ntfs_close },
 	{ &vop_open_desc, (vop_t *)ntfs_open },
 	{ &vop_readdir_desc, (vop_t *)ntfs_readdir },
 	{ &vop_fsync_desc, (vop_t *)ntfs_fsync },
 
 	{ &vop_bmap_desc, (vop_t *)ntfs_bmap },
 	{ &vop_getpages_desc, (vop_t *) ntfs_getpages },
 	{ &vop_putpages_desc, (vop_t *) ntfs_putpages },
 	{ &vop_strategy_desc, (vop_t *)ntfs_strategy },
-	{ &vop_bwrite_desc, (vop_t *)vop_stdbwrite },
 	{ &vop_read_desc, (vop_t *)ntfs_read },
 	{ &vop_write_desc, (vop_t *)ntfs_write },
 
 	{ NULL, NULL }
 };
 
 static
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 VNODEOP_SET(ntfs_vnodeop_opv_desc);
 
 #else /* !FreeBSD */
 
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *) ntfs_bypass },
 	{ &vop_lookup_desc, (vop_t *) ntfs_lookup },	/* lookup */
 	{ &vop_create_desc, genfs_eopnotsupp },		/* create */
 	{ &vop_mknod_desc, genfs_eopnotsupp },		/* mknod */
 	{ &vop_open_desc, (vop_t *) ntfs_open },	/* open */
 	{ &vop_close_desc,(vop_t *)  ntfs_close },	/* close */
 	{ &vop_access_desc, (vop_t *) ntfs_access },	/* access */
 	{ &vop_getattr_desc, (vop_t *) ntfs_getattr },	/* getattr */
 	{ &vop_setattr_desc, genfs_eopnotsupp },	/* setattr */
 	{ &vop_read_desc, (vop_t *) ntfs_read },	/* read */
 	{ &vop_write_desc, (vop_t *) ntfs_write },	/* write */
 	{ &vop_lease_desc, genfs_lease_check },		/* lease */
 	{ &vop_fcntl_desc, genfs_fcntl },		/* fcntl */
 	{ &vop_ioctl_desc, genfs_enoioctl },		/* ioctl */
 	{ &vop_poll_desc, genfs_poll },			/* poll */
 	{ &vop_revoke_desc, genfs_revoke },		/* revoke */
 	{ &vop_fsync_desc, genfs_fsync },		/* fsync */
 	{ &vop_seek_desc, genfs_seek },			/* seek */
 	{ &vop_remove_desc, genfs_eopnotsupp },		/* remove */
 	{ &vop_link_desc, genfs_eopnotsupp },		/* link */
 	{ &vop_rename_desc, genfs_eopnotsupp },		/* rename */
 	{ &vop_mkdir_desc, genfs_eopnotsupp },		/* mkdir */
 	{ &vop_rmdir_desc, genfs_eopnotsupp },		/* rmdir */
 	{ &vop_symlink_desc, genfs_eopnotsupp },	/* symlink */
 	{ &vop_readdir_desc, (vop_t *) ntfs_readdir },	/* readdir */
 	{ &vop_readlink_desc, genfs_eopnotsupp },	/* readlink */
 	{ &vop_abortop_desc, genfs_abortop },		/* abortop */
 	{ &vop_inactive_desc, (vop_t *) ntfs_inactive },	/* inactive */
 	{ &vop_reclaim_desc, (vop_t *) ntfs_reclaim },	/* reclaim */
 	{ &vop_lock_desc, genfs_lock },			/* lock */
 	{ &vop_unlock_desc, genfs_unlock },		/* unlock */
 	{ &vop_bmap_desc, (vop_t *) ntfs_bmap },	/* bmap */
 	{ &vop_strategy_desc, (vop_t *) ntfs_strategy },	/* strategy */
 	{ &vop_print_desc, (vop_t *) ntfs_print },	/* print */
 	{ &vop_islocked_desc, genfs_islocked },		/* islocked */
 	{ &vop_pathconf_desc, ntfs_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, genfs_nullop },		/* advlock */
 	{ &vop_blkatoff_desc, genfs_eopnotsupp },	/* blkatoff */
 	{ &vop_valloc_desc, genfs_eopnotsupp },		/* valloc */
 	{ &vop_reallocblks_desc, genfs_eopnotsupp },	/* reallocblks */
 	{ &vop_vfree_desc, genfs_eopnotsupp },		/* vfree */
 	{ &vop_truncate_desc, genfs_eopnotsupp },	/* truncate */
 	{ &vop_update_desc, genfs_eopnotsupp },		/* update */
 	{ &vop_bwrite_desc, vn_bwrite },		/* bwrite */
 	{ (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
 };
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 #endif
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c	(revision 75579)
+++ head/sys/kern/vfs_bio.c	(revision 75580)
@@ -1,3245 +1,3252 @@
 /*
  * Copyright (c) 1994,1997 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  *
  * $FreeBSD$
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  *
  * see man buf(9) for more info.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/eventhandler.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/resourcevar.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
 struct	bio_ops bioops;		/* I/O operation notification */
 
+struct	buf_ops buf_ops_bio = {
+	"buf_ops_bio",
+	bwrite
+};
+
 struct buf *buf;		/* buffer header pool */
 struct swqueue bswlist;
 struct mtx buftimelock;		/* Interlock on setting prio and timo */
 
 static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
 			       int pageno, vm_page_t m);
 static void vfs_clean_pages(struct buf * bp);
 static void vfs_setdirty(struct buf *bp);
 static void vfs_vmio_release(struct buf *bp);
 static void vfs_backgroundwritedone(struct buf *bp);
 static int flushbufqueues(void);
 
 static int bd_request;
 
 static void buf_daemon __P((void));
 /*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
  * for input in the case of buffers partially already in memory,
  * but the code is intricate enough already.
  */
 vm_page_t bogus_page;
 int vmiodirenable = FALSE;
 int runningbufspace;
 static vm_offset_t bogus_offset;
 
 static int bufspace, maxbufspace,
 	bufmallocspace, maxbufmallocspace, lobufspace, hibufspace;
 static int bufreusecnt, bufdefragcnt, buffreekvacnt;
 static int needsbuffer;
 static int lorunningspace, hirunningspace, runningbufreq;
 static int numdirtybuffers, lodirtybuffers, hidirtybuffers;
 static int numfreebuffers, lofreebuffers, hifreebuffers;
 static int getnewbufcalls;
 static int getnewbufrestarts;
 
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD,
 	&numdirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW,
 	&lodirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW,
 	&hidirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD,
 	&numfreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW,
 	&lofreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW,
 	&hifreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, runningbufspace, CTLFLAG_RD,
 	&runningbufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lorunningspace, CTLFLAG_RW,
 	&lorunningspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hirunningspace, CTLFLAG_RW,
 	&hirunningspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RD,
 	&maxbufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hibufspace, CTLFLAG_RD,
 	&hibufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lobufspace, CTLFLAG_RD,
 	&lobufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
 	&bufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW,
 	&maxbufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD,
 	&bufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufcalls, CTLFLAG_RW,
 	&getnewbufcalls, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, getnewbufrestarts, CTLFLAG_RW,
 	&getnewbufrestarts, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, vmiodirenable, CTLFLAG_RW,
 	&vmiodirenable, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufdefragcnt, CTLFLAG_RW,
 	&bufdefragcnt, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, buffreekvacnt, CTLFLAG_RW,
 	&buffreekvacnt, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufreusecnt, CTLFLAG_RW,
 	&bufreusecnt, 0, "");
 
 static int bufhashmask;
 static LIST_HEAD(bufhashhdr, buf) *bufhashtbl, invalhash;
 struct bqueues bufqueues[BUFFER_QUEUES] = { { 0 } };
 char *buf_wmesg = BUF_WMESG;
 
 extern int vm_swap_size;
 
 #define VFS_BIO_NEED_ANY	0x01	/* any freeable buffer */
 #define VFS_BIO_NEED_DIRTYFLUSH	0x02	/* waiting for dirty buffer flush */
 #define VFS_BIO_NEED_FREE	0x04	/* wait for free bufs, hi hysteresis */
 #define VFS_BIO_NEED_BUFSPACE	0x08	/* wait for buf space, lo hysteresis */
 
 /*
  * Buffer hash table code.  Note that the logical block scans linearly, which
  * gives us some L1 cache locality.
  */
 
 static __inline 
 struct bufhashhdr *
 bufhash(struct vnode *vnp, daddr_t bn)
 {
 	return(&bufhashtbl[(((uintptr_t)(vnp) >> 7) + (int)bn) & bufhashmask]);
 }
 
 /*
  *	numdirtywakeup:
  *
  *	If someone is blocked due to there being too many dirty buffers,
  *	and numdirtybuffers is now reasonable, wake them up.
  */
 
 static __inline void
 numdirtywakeup(int level)
 {
 	if (numdirtybuffers <= level) {
 		if (needsbuffer & VFS_BIO_NEED_DIRTYFLUSH) {
 			needsbuffer &= ~VFS_BIO_NEED_DIRTYFLUSH;
 			wakeup(&needsbuffer);
 		}
 	}
 }
 
 /*
  *	bufspacewakeup:
  *
  *	Called when buffer space is potentially available for recovery.
  *	getnewbuf() will block on this flag when it is unable to free 
  *	sufficient buffer space.  Buffer space becomes recoverable when 
  *	bp's get placed back in the queues.
  */
 
 static __inline void
 bufspacewakeup(void)
 {
 	/*
 	 * If someone is waiting for BUF space, wake them up.  Even
 	 * though we haven't freed the kva space yet, the waiting
 	 * process will be able to now.
 	 */
 	if (needsbuffer & VFS_BIO_NEED_BUFSPACE) {
 		needsbuffer &= ~VFS_BIO_NEED_BUFSPACE;
 		wakeup(&needsbuffer);
 	}
 }
 
 /*
  * runningbufwakeup() - in-progress I/O accounting.
  *
  */
 static __inline void
 runningbufwakeup(struct buf *bp)
 {
 	if (bp->b_runningbufspace) {
 		runningbufspace -= bp->b_runningbufspace;
 		bp->b_runningbufspace = 0;
 		if (runningbufreq && runningbufspace <= lorunningspace) {
 			runningbufreq = 0;
 			wakeup(&runningbufreq);
 		}
 	}
 }
 
 /*
  *	bufcountwakeup:
  *
  *	Called when a buffer has been added to one of the free queues to
  *	account for the buffer and to wakeup anyone waiting for free buffers.
  *	This typically occurs when large amounts of metadata are being handled
  *	by the buffer cache ( else buffer space runs out first, usually ).
  */
 
 static __inline void
 bufcountwakeup(void) 
 {
 	++numfreebuffers;
 	if (needsbuffer) {
 		needsbuffer &= ~VFS_BIO_NEED_ANY;
 		if (numfreebuffers >= hifreebuffers)
 			needsbuffer &= ~VFS_BIO_NEED_FREE;
 		wakeup(&needsbuffer);
 	}
 }
 
 /*
  *	waitrunningbufspace()
  *
  *	runningbufspace is a measure of the amount of I/O currently
  *	running.  This routine is used in async-write situations to
  *	prevent creating huge backups of pending writes to a device.
  *	Only asynchronous writes are governed by this function.
  *
  *	Reads will adjust runningbufspace, but will not block based on it.
  *	The read load has a side effect of reducing the allowed write load.
  *
  *	This does NOT turn an async write into a sync write.  It waits  
  *	for earlier writes to complete and generally returns before the
  *	caller's write has reached the device.
  */
 static __inline void
 waitrunningbufspace(void)
 {
 	while (runningbufspace > hirunningspace) {
 		++runningbufreq;
 		tsleep(&runningbufreq, PVM, "wdrain", 0);
 	}
 }
 
 
 /*
  *	vfs_buf_test_cache:
  *
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
  */
 static __inline__
 void
 vfs_buf_test_cache(struct buf *bp,
 		  vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
 		  vm_page_t m)
 {
 	if (bp->b_flags & B_CACHE) {
 		int base = (foff + off) & PAGE_MASK;
 		if (vm_page_is_valid(m, base, size) == 0)
 			bp->b_flags &= ~B_CACHE;
 	}
 }
 
 static __inline__
 void
 bd_wakeup(int dirtybuflevel)
 {
 	if (bd_request == 0 && numdirtybuffers >= dirtybuflevel) {
 		bd_request = 1;
 		wakeup(&bd_request);
 	}
 }
 
 /*
  * bd_speedup - speedup the buffer cache flushing code
  */
 
 static __inline__
 void
 bd_speedup(void)
 {
 	bd_wakeup(1);
 }
 
 /*
  * Initialize buffer headers and related structures. 
  */
 
 caddr_t
 bufhashinit(caddr_t vaddr)
 {
 	/* first, make a null hash table */
 	for (bufhashmask = 8; bufhashmask < nbuf / 4; bufhashmask <<= 1)
 		;
 	bufhashtbl = (void *)vaddr;
 	vaddr = vaddr + sizeof(*bufhashtbl) * bufhashmask;
 	--bufhashmask;
 	return(vaddr);
 }
 
 void
 bufinit(void)
 {
 	struct buf *bp;
 	int i;
 
 	TAILQ_INIT(&bswlist);
 	LIST_INIT(&invalhash);
 	mtx_init(&buftimelock, "buftime lock", MTX_DEF);
 
 	for (i = 0; i <= bufhashmask; i++)
 		LIST_INIT(&bufhashtbl[i]);
 
 	/* next, make a null set of free lists */
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;	/* we're just an empty header */
 		bp->b_dev = NODEV;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_xflags = 0;
 		LIST_INIT(&bp->b_dep);
 		BUF_LOCKINIT(bp);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	}
 
 	/*
 	 * maxbufspace is the absolute maximum amount of buffer space we are 
 	 * allowed to reserve in KVM and in real terms.  The absolute maximum
 	 * is nominally used by buf_daemon.  hibufspace is the nominal maximum
 	 * used by most other processes.  The differential is required to 
 	 * ensure that buf_daemon is able to run when other processes might 
 	 * be blocked waiting for buffer space.
 	 *
 	 * maxbufspace is based on BKVASIZE.  Allocating buffers larger then
 	 * this may result in KVM fragmentation which is not handled optimally
 	 * by the system.
 	 */
 	maxbufspace = nbuf * BKVASIZE;
 	hibufspace = imax(3 * maxbufspace / 4, maxbufspace - MAXBSIZE * 10);
 	lobufspace = hibufspace - MAXBSIZE;
 
 	lorunningspace = 512 * 1024;
 	hirunningspace = 1024 * 1024;
 
 /*
  * Limit the amount of malloc memory since it is wired permanently into
  * the kernel space.  Even though this is accounted for in the buffer
  * allocation, we don't want the malloced region to grow uncontrolled.
  * The malloc scheme improves memory utilization significantly on average
  * (small) directories.
  */
 	maxbufmallocspace = hibufspace / 20;
 
 /*
  * Reduce the chance of a deadlock occuring by limiting the number
  * of delayed-write dirty buffers we allow to stack up.
  */
 	hidirtybuffers = nbuf / 4 + 20;
 	numdirtybuffers = 0;
 /*
  * To support extreme low-memory systems, make sure hidirtybuffers cannot
  * eat up all available buffer space.  This occurs when our minimum cannot
  * be met.  We try to size hidirtybuffers to 3/4 our buffer space assuming
  * BKVASIZE'd (8K) buffers.
  */
 	while (hidirtybuffers * BKVASIZE > 3 * hibufspace / 4) {
 		hidirtybuffers >>= 1;
 	}
 	lodirtybuffers = hidirtybuffers / 2;
 
 /*
  * Try to keep the number of free buffers in the specified range,
  * and give special processes (e.g. like buf_daemon) access to an 
  * emergency reserve.
  */
 	lofreebuffers = nbuf / 18 + 5;
 	hifreebuffers = 2 * lofreebuffers;
 	numfreebuffers = nbuf;
 
 /*
  * Maximum number of async ops initiated per buf_daemon loop.  This is
  * somewhat of a hack at the moment, we really need to limit ourselves
  * based on the number of bytes of I/O in-transit that were initiated
  * from buf_daemon.
  */
 
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 			VM_ALLOC_NORMAL);
 	cnt.v_wire_count++;
 
 }
 
 /*
  * bfreekva() - free the kva allocation for a buffer.
  *
  *	Must be called at splbio() or higher as this is the only locking for
  *	buffer_map.
  *
  *	Since this call frees up buffer space, we call bufspacewakeup().
  */
 static void
 bfreekva(struct buf * bp)
 {
 	if (bp->b_kvasize) {
 		++buffreekvacnt;
 		bufspace -= bp->b_kvasize;
 		vm_map_delete(buffer_map,
 		    (vm_offset_t) bp->b_kvabase,
 		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize
 		);
 		bp->b_kvasize = 0;
 		bufspacewakeup();
 	}
 }
 
 /*
  *	bremfree:
  *
  *	Remove the buffer from the appropriate free list.
  */
 void
 bremfree(struct buf * bp)
 {
 	int s = splbio();
 	int old_qindex = bp->b_qindex;
 
 	if (bp->b_qindex != QUEUE_NONE) {
 		KASSERT(BUF_REFCNT(bp) == 1, ("bremfree: bp %p not locked",bp));
 		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 	} else {
 		if (BUF_REFCNT(bp) <= 1)
 			panic("bremfree: removing a buffer not on a queue");
 	}
 
 	/*
 	 * Fixup numfreebuffers count.  If the buffer is invalid or not
 	 * delayed-write, and it was on the EMPTY, LRU, or AGE queues,
 	 * the buffer was free and we must decrement numfreebuffers.
 	 */
 	if ((bp->b_flags & B_INVAL) || (bp->b_flags & B_DELWRI) == 0) {
 		switch(old_qindex) {
 		case QUEUE_DIRTY:
 		case QUEUE_CLEAN:
 		case QUEUE_EMPTY:
 		case QUEUE_EMPTYKVA:
 			--numfreebuffers;
 			break;
 		default:
 			break;
 		}
 	}
 	splx(s);
 }
 
 
 /*
  * Get a buffer with the specified data.  Look in the cache first.  We
  * must clear BIO_ERROR and B_INVAL prior to initiating I/O.  If B_CACHE
  * is set, the buffer is valid and we do not have to do anything ( see
  * getblk() ).
  */
 int
 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
     struct buf ** bpp)
 {
 	struct buf *bp;
 
 	bp = getblk(vp, blkno, size, 0, 0);
 	*bpp = bp;
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != PCPU_GET(idleproc))
 			curproc->p_stats->p_ru.ru_inblock++;
 		KASSERT(!(bp->b_flags & B_ASYNC), ("bread: illegal async bp %p", bp));
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(vp, bp);
 		return (bufwait(bp));
 	}
 	return (0);
 }
 
 /*
  * Operates like bread, but also starts asynchronous I/O on
  * read-ahead blocks.  We must clear BIO_ERROR and B_INVAL prior
  * to initiating I/O . If B_CACHE is set, the buffer is valid 
  * and we do not have to do anything.
  */
 int
 breadn(struct vnode * vp, daddr_t blkno, int size,
     daddr_t * rablkno, int *rabsize,
     int cnt, struct ucred * cred, struct buf ** bpp)
 {
 	struct buf *bp, *rabp;
 	int i;
 	int rv = 0, readwait = 0;
 
 	*bpp = bp = getblk(vp, blkno, size, 0, 0);
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != PCPU_GET(idleproc))
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(vp, bp);
 		++readwait;
 	}
 
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (curproc != PCPU_GET(idleproc))
 				curproc->p_stats->p_ru.ru_inblock++;
 			rabp->b_flags |= B_ASYNC;
 			rabp->b_flags &= ~B_INVAL;
 			rabp->b_ioflags &= ~BIO_ERROR;
 			rabp->b_iocmd = BIO_READ;
 			if (rabp->b_rcred == NOCRED) {
 				if (cred != NOCRED)
 					crhold(cred);
 				rabp->b_rcred = cred;
 			}
 			vfs_busy_pages(rabp, 0);
 			BUF_KERNPROC(rabp);
 			VOP_STRATEGY(vp, rabp);
 		} else {
 			brelse(rabp);
 		}
 	}
 
 	if (readwait) {
 		rv = bufwait(bp);
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async).  Do not bother writing anything if the buffer
  * is invalid.
  *
  * Note that we set B_CACHE here, indicating that buffer is
  * fully valid and thus cacheable.  This is true even of NFS
  * now so we set it generally.  This could be set either here 
  * or in biodone() since the I/O is synchronous.  We put it
  * here.
  */
 
 int dobkgrdwrite = 1;
 SYSCTL_INT(_debug, OID_AUTO, dobkgrdwrite, CTLFLAG_RW, &dobkgrdwrite, 0, "");
 
 int
 bwrite(struct buf * bp)
 {
 	int oldflags, s;
 	struct buf *newbp;
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 
 	oldflags = bp->b_flags;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not busy???");
 	s = splbio();
 	/*
 	 * If a background write is already in progress, delay
 	 * writing this block if it is asynchronous. Otherwise
 	 * wait for the background write to complete.
 	 */
 	if (bp->b_xflags & BX_BKGRDINPROG) {
 		if (bp->b_flags & B_ASYNC) {
 			splx(s);
 			bdwrite(bp);
 			return (0);
 		}
 		bp->b_xflags |= BX_BKGRDWAIT;
 		tsleep(&bp->b_xflags, PRIBIO, "biord", 0);
 		if (bp->b_xflags & BX_BKGRDINPROG)
 			panic("bwrite: still writing");
 	}
 
 	/* Mark the buffer clean */
 	bundirty(bp);
 
 	/*
 	 * If this buffer is marked for background writing and we
 	 * do not have to wait for it, make a copy and write the
 	 * copy so as to leave this buffer ready for further use.
 	 *
 	 * This optimization eats a lot of memory.  If we have a page
 	 * or buffer shortfall we can't do it.
 	 */
 	if (dobkgrdwrite && (bp->b_xflags & BX_BKGRDWRITE) && 
 	    (bp->b_flags & B_ASYNC) &&
 	    !vm_page_count_severe() &&
 	    !buf_dirty_count_severe()) {
 		if (bp->b_iodone != NULL) {
 			printf("bp->b_iodone = %p\n", bp->b_iodone);
 			panic("bwrite: need chained iodone");
 		}
 
 		/* get a new block */
 		newbp = geteblk(bp->b_bufsize);
 
 		/* set it to be identical to the old block */
 		memcpy(newbp->b_data, bp->b_data, bp->b_bufsize);
 		bgetvp(bp->b_vp, newbp);
 		newbp->b_lblkno = bp->b_lblkno;
 		newbp->b_blkno = bp->b_blkno;
 		newbp->b_offset = bp->b_offset;
 		newbp->b_iodone = vfs_backgroundwritedone;
 		newbp->b_flags |= B_ASYNC;
 		newbp->b_flags &= ~B_INVAL;
 
 		/* move over the dependencies */
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_movedeps(bp, newbp);
 
 		/*
 		 * Initiate write on the copy, release the original to
 		 * the B_LOCKED queue so that it cannot go away until
 		 * the background write completes. If not locked it could go
 		 * away and then be reconstituted while it was being written.
 		 * If the reconstituted buffer were written, we could end up
 		 * with two background copies being written at the same time.
 		 */
 		bp->b_xflags |= BX_BKGRDINPROG;
 		bp->b_flags |= B_LOCKED;
 		bqrelse(bp);
 		bp = newbp;
 	}
 
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_flags |= B_WRITEINPROG | B_CACHE;
 	bp->b_iocmd = BIO_WRITE;
 
 	bp->b_vp->v_numoutput++;
 	vfs_busy_pages(bp, 1);
 
 	/*
 	 * Normal bwrites pipeline writes
 	 */
 	bp->b_runningbufspace = bp->b_bufsize;
 	runningbufspace += bp->b_runningbufspace;
 
 	if (curproc != PCPU_GET(idleproc))
 		curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 	if (oldflags & B_ASYNC)
 		BUF_KERNPROC(bp);
 	BUF_STRATEGY(bp);
 
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 		brelse(bp);
 		return (rtval);
 	} else {
 		/*
 		 * don't allow the async write to saturate the I/O
 		 * system.  There is no chance of deadlock here because
 		 * we are blocking on I/O that is already in-progress.
 		 */
 		waitrunningbufspace();
 	}
 
 	return (0);
 }
 
 /*
  * Complete a background write started from bwrite.
  */
 static void
 vfs_backgroundwritedone(bp)
 	struct buf *bp;
 {
 	struct buf *origbp;
 
 	/*
 	 * Find the original buffer that we are writing.
 	 */
 	if ((origbp = gbincore(bp->b_vp, bp->b_lblkno)) == NULL)
 		panic("backgroundwritedone: lost buffer");
 	/*
 	 * Process dependencies then return any unfinished ones.
 	 */
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_complete(bp);
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_movedeps(bp, origbp);
 	/*
 	 * Clear the BX_BKGRDINPROG flag in the original buffer
 	 * and awaken it if it is waiting for the write to complete.
 	 * If BX_BKGRDINPROG is not set in the original buffer it must
 	 * have been released and re-instantiated - which is not legal.
 	 */
 	KASSERT((origbp->b_xflags & BX_BKGRDINPROG), ("backgroundwritedone: lost buffer2"));
 	origbp->b_xflags &= ~BX_BKGRDINPROG;
 	if (origbp->b_xflags & BX_BKGRDWAIT) {
 		origbp->b_xflags &= ~BX_BKGRDWAIT;
 		wakeup(&origbp->b_xflags);
 	}
 	/*
 	 * Clear the B_LOCKED flag and remove it from the locked
 	 * queue if it currently resides there.
 	 */
 	origbp->b_flags &= ~B_LOCKED;
 	if (BUF_LOCK(origbp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
 		bremfree(origbp);
 		bqrelse(origbp);
 	}
 	/*
 	 * This buffer is marked B_NOCACHE, so when it is released
 	 * by biodone, it will be tossed. We mark it with BIO_READ
 	 * to avoid biodone doing a second vwakeup.
 	 */
 	bp->b_flags |= B_NOCACHE;
 	bp->b_iocmd = BIO_READ;
 	bp->b_flags &= ~(B_CACHE | B_DONE);
 	bp->b_iodone = 0;
 	bufdone(bp);
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).  Do not bother writing
  * anything if the buffer is marked invalid.
  *
  * Note that since the buffer must be completely valid, we can safely
  * set B_CACHE.  In fact, we have to set B_CACHE here rather then in
  * biodone() in order to prevent getblk from writing the buffer
  * out synchronously.
  */
 void
 bdwrite(struct buf * bp)
 {
 	if (BUF_REFCNT(bp) == 0)
 		panic("bdwrite: buffer is not busy");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 	bdirty(bp);
 
 	/*
 	 * Set B_CACHE, indicating that the buffer is fully valid.  This is
 	 * true even of NFS now.
 	 */
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * This bmap keeps the system from needing to do the bmap later,
 	 * perhaps when the system is attempting to do a sync.  Since it
 	 * is likely that the indirect block -- or whatever other datastructure
 	 * that the filesystem needs is still in memory now, it is a good
 	 * thing to do this.  Note also, that if the pageout daemon is
 	 * requesting a sync -- there might not be enough memory to do
 	 * the bmap then...  So, this is important to do.
 	 */
 	if (bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
 	vfs_setdirty(bp);
 
 	/*
 	 * We need to do this here to satisfy the vnode_pager and the
 	 * pageout daemon, so that it thinks that the pages have been
 	 * "cleaned".  Note that since the pages are in a delayed write
 	 * buffer -- the VFS layer "will" see that the pages get written
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
 	bqrelse(bp);
 
 	/*
 	 * Wakeup the buffer flushing daemon if we have a lot of dirty
 	 * buffers (midpoint between our recovery point and our stall
 	 * point).
 	 */
 	bd_wakeup((lodirtybuffers + hidirtybuffers) / 2);
 
 	/*
 	 * note: we cannot initiate I/O from a bdwrite even if we wanted to,
 	 * due to the softdep code.
 	 */
 }
 
 /*
  *	bdirty:
  *
  *	Turn buffer into delayed write request.  We must clear BIO_READ and
  *	B_RELBUF, and we must set B_DELWRI.  We reassign the buffer to 
  *	itself to properly update it in the dirty/clean lists.  We mark it
  *	B_DONE to ensure that any asynchronization of the buffer properly
  *	clears B_DONE ( else a panic will occur later ).  
  *
  *	bdirty() is kinda like bdwrite() - we have to clear B_INVAL which
  *	might have been set pre-getblk().  Unlike bwrite/bdwrite, bdirty()
  *	should only be called if the buffer is known-good.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 void
 bdirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE, ("bdirty: buffer %p still on queue %d", bp, bp->b_qindex));
 	bp->b_flags &= ~(B_RELBUF);
 	bp->b_iocmd = BIO_WRITE;
 
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= B_DONE | B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		++numdirtybuffers;
 		bd_wakeup((lodirtybuffers + hidirtybuffers) / 2);
 	}
 }
 
 /*
  *	bundirty:
  *
  *	Clear B_DELWRI for buffer.
  *
  *	Since the buffer is not on a queue, we do not update the numfreebuffers
  *	count.
  *	
  *	Must be called at splbio().
  *	The buffer must be on QUEUE_NONE.
  */
 
 void
 bundirty(bp)
 	struct buf *bp;
 {
 	KASSERT(bp->b_qindex == QUEUE_NONE, ("bundirty: buffer %p still on queue %d", bp, bp->b_qindex));
 
 	if (bp->b_flags & B_DELWRI) {
 		bp->b_flags &= ~B_DELWRI;
 		reassignbuf(bp, bp->b_vp);
 		--numdirtybuffers;
 		numdirtywakeup(lodirtybuffers);
 	}
 	/*
 	 * Since it is now being written, we can clear its deferred write flag.
 	 */
 	bp->b_flags &= ~B_DEFERRED;
 }
 
 /*
  *	bawrite:
  *
  *	Asynchronous write.  Start output on a buffer, but do not wait for
  *	it to complete.  The buffer is released when the output completes.
  *
  *	bwrite() ( or the VOP routine anyway ) is responsible for handling 
  *	B_INVAL buffers.  Not us.
  */
 void
 bawrite(struct buf * bp)
 {
 	bp->b_flags |= B_ASYNC;
 	(void) BUF_WRITE(bp);
 }
 
 /*
  *	bowrite:
  *
  *	Ordered write.  Start output on a buffer, and flag it so that the 
  *	device will write it in the order it was queued.  The buffer is 
  *	released when the output completes.  bwrite() ( or the VOP routine
  *	anyway ) is responsible for handling B_INVAL buffers.
  */
 int
 bowrite(struct buf * bp)
 {
 	bp->b_ioflags |= BIO_ORDERED;
 	bp->b_flags |= B_ASYNC;
 	return (BUF_WRITE(bp));
 }
 
 /*
  *	bwillwrite:
  *
  *	Called prior to the locking of any vnodes when we are expecting to
  *	write.  We do not want to starve the buffer cache with too many
  *	dirty buffers so we block here.  By blocking prior to the locking
  *	of any vnodes we attempt to avoid the situation where a locked vnode
  *	prevents the various system daemons from flushing related buffers.
  */
 
 void
 bwillwrite(void)
 {
 	if (numdirtybuffers >= hidirtybuffers) {
 		int s;
 
 		s = splbio();
 		while (numdirtybuffers >= hidirtybuffers) {
 			bd_wakeup(1);
 			needsbuffer |= VFS_BIO_NEED_DIRTYFLUSH;
 			tsleep(&needsbuffer, (PRIBIO + 4), "flswai", 0);
 		}
 		splx(s);
 	}
 }
 
 /*
  * Return true if we have too many dirty buffers.
  */
 int
 buf_dirty_count_severe(void)
 {
 	return(numdirtybuffers >= hidirtybuffers);
 }
 
 /*
  *	brelse:
  *
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	s = splbio();
 
 	if (bp->b_flags & B_LOCKED)
 		bp->b_ioflags &= ~BIO_ERROR;
 
 	if (bp->b_iocmd == BIO_WRITE &&
 	    (bp->b_ioflags & BIO_ERROR) &&
 	    !(bp->b_flags & B_INVAL)) {
 		/*
 		 * Failed write, redirty.  Must clear BIO_ERROR to prevent
 		 * pages from being scrapped.  If B_INVAL is set then
 		 * this case is not run and the next case is run to 
 		 * destroy the buffer.  B_INVAL can occur if the buffer
 		 * is outside the range supported by the underlying device.
 		 */
 		bp->b_ioflags &= ~BIO_ERROR;
 		bdirty(bp);
 	} else if ((bp->b_flags & (B_NOCACHE | B_INVAL)) ||
 	    (bp->b_ioflags & BIO_ERROR) ||
 	    bp->b_iocmd == BIO_DELETE || (bp->b_bufsize <= 0)) {
 		/*
 		 * Either a failed I/O or we were asked to free or not
 		 * cache the buffer.
 		 */
 		bp->b_flags |= B_INVAL;
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_deallocate(bp);
 		if (bp->b_flags & B_DELWRI) {
 			--numdirtybuffers;
 			numdirtywakeup(lodirtybuffers);
 		}
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			if (bp->b_bufsize)
 				allocbuf(bp, 0);
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 	}
 
 	/*
 	 * We must clear B_RELBUF if B_DELWRI is set.  If vfs_vmio_release() 
 	 * is called with B_DELWRI set, the underlying pages may wind up
 	 * getting freed causing a previous write (bdwrite()) to get 'lost'
 	 * because pages associated with a B_DELWRI bp are marked clean.
 	 * 
 	 * We still allow the B_INVAL case to call vfs_vmio_release(), even
 	 * if B_DELWRI is set.
 	 *
 	 * If B_DELWRI is not set we may have to set B_RELBUF if we are low
 	 * on pages to return pages to the VM page queues.
 	 */
 	if (bp->b_flags & B_DELWRI)
 		bp->b_flags &= ~B_RELBUF;
 	else if (vm_page_count_severe() && !(bp->b_xflags & BX_BKGRDINPROG))
 		bp->b_flags |= B_RELBUF;
 
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, not even NFS buffers now.  Two flags effect this.  If
 	 * B_INVAL, the struct buf is invalidated but the VM object is kept
 	 * around ( i.e. so it is trivial to reconstitute the buffer later ).
 	 *
 	 * If BIO_ERROR or B_NOCACHE is set, pages in the VM object will be
 	 * invalidated.  BIO_ERROR cannot be set for a failed write unless the
 	 * buffer is also B_INVAL because it hits the re-dirtying code above.
 	 *
 	 * Normally we can do this whether a buffer is B_DELWRI or not.  If
 	 * the buffer is an NFS buffer, it is tracking piecemeal writes or
 	 * the commit state and we cannot afford to lose the buffer. If the
 	 * buffer has a background write in progress, we need to keep it
 	 * around to prevent it from being reconstituted and starting a second
 	 * background write.
 	 */
 	if ((bp->b_flags & B_VMIO)
 	    && !(bp->b_vp->v_tag == VT_NFS &&
 		 !vn_isdisk(bp->b_vp, NULL) &&
 		 (bp->b_flags & B_DELWRI))
 	    ) {
 
 		int i, j, resid;
 		vm_page_t m;
 		off_t foff;
 		vm_pindex_t poff;
 		vm_object_t obj;
 		struct vnode *vp;
 
 		vp = bp->b_vp;
 
 		/*
 		 * Get the base offset and length of the buffer.  Note that 
 		 * for block sizes that are less then PAGE_SIZE, the b_data
 		 * base of the buffer does not represent exactly b_offset and
 		 * neither b_offset nor b_size are necessarily page aligned.
 		 * Instead, the starting position of b_offset is:
 		 *
 		 * 	b_data + (b_offset & PAGE_MASK)
 		 *
 		 * block sizes less then DEV_BSIZE (usually 512) are not 
 		 * supported due to the page granularity bits (m->valid,
 		 * m->dirty, etc...). 
 		 *
 		 * See man buf(9) for more information
 		 */
 		resid = bp->b_bufsize;
 		foff = bp->b_offset;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			int had_bogus = 0;
 
 			m = bp->b_pages[i];
 			vm_page_flag_clear(m, PG_ZERO);
 
 			/*
 			 * If we hit a bogus page, fixup *all* the bogus pages
 			 * now.
 			 */
 			if (m == bogus_page) {
 				VOP_GETVOBJECT(vp, &obj);
 				poff = OFF_TO_IDX(bp->b_offset);
 				had_bogus = 1;
 
 				for (j = i; j < bp->b_npages; j++) {
 					vm_page_t mtmp;
 					mtmp = bp->b_pages[j];
 					if (mtmp == bogus_page) {
 						mtmp = vm_page_lookup(obj, poff + j);
 						if (!mtmp) {
 							panic("brelse: page missing\n");
 						}
 						bp->b_pages[j] = mtmp;
 					}
 				}
 
 				if ((bp->b_flags & B_INVAL) == 0) {
 					pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 				}
 				m = bp->b_pages[i];
 			}
 			if ((bp->b_flags & B_NOCACHE) || (bp->b_ioflags & BIO_ERROR)) {
 				int poffset = foff & PAGE_MASK;
 				int presid = resid > (PAGE_SIZE - poffset) ?
 					(PAGE_SIZE - poffset) : resid;
 
 				KASSERT(presid >= 0, ("brelse: extra page"));
 				vm_page_set_invalid(m, poffset, presid);
 				if (had_bogus)
 					printf("avoided corruption bug in bogus_page/brelse code\n");
 			}
 			resid -= PAGE_SIZE - (foff & PAGE_MASK);
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		}
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	} else if (bp->b_flags & B_VMIO) {
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	}
 			
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("brelse: free buffer onto another queue???");
 	if (BUF_REFCNT(bp) > 1) {
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 
 	/* enqueue */
 
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		bp->b_flags |= B_INVAL;
 		bp->b_xflags &= ~BX_BKGRDWRITE;
 		if (bp->b_xflags & BX_BKGRDINPROG)
 			panic("losing buffer 1");
 		if (bp->b_kvasize) {
 			bp->b_qindex = QUEUE_EMPTYKVA;
 		} else {
 			bp->b_qindex = QUEUE_EMPTY;
 		}
 		TAILQ_INSERT_HEAD(&bufqueues[bp->b_qindex], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 	/* buffers with junk contents */
 	} else if (bp->b_flags & (B_INVAL | B_NOCACHE | B_RELBUF) || (bp->b_ioflags & BIO_ERROR)) {
 		bp->b_flags |= B_INVAL;
 		bp->b_xflags &= ~BX_BKGRDWRITE;
 		if (bp->b_xflags & BX_BKGRDINPROG)
 			panic("losing buffer 2");
 		bp->b_qindex = QUEUE_CLEAN;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 
 	/* buffers that are locked */
 	} else if (bp->b_flags & B_LOCKED) {
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 
 	/* remaining buffers */
 	} else {
 		switch(bp->b_flags & (B_DELWRI|B_AGE)) {
 		case B_DELWRI | B_AGE:
 		    bp->b_qindex = QUEUE_DIRTY;
 		    TAILQ_INSERT_HEAD(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 		    break;
 		case B_DELWRI:
 		    bp->b_qindex = QUEUE_DIRTY;
 		    TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 		    break;
 		case B_AGE:
 		    bp->b_qindex = QUEUE_CLEAN;
 		    TAILQ_INSERT_HEAD(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		    break;
 		default:
 		    bp->b_qindex = QUEUE_CLEAN;
 		    TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 		    break;
 		}
 	}
 
 	/*
 	 * If B_INVAL, clear B_DELWRI.  We've already placed the buffer
 	 * on the correct queue.
 	 */
 	if ((bp->b_flags & (B_INVAL|B_DELWRI)) == (B_INVAL|B_DELWRI)) {
 		bp->b_flags &= ~B_DELWRI;
 		--numdirtybuffers;
 		numdirtywakeup(lodirtybuffers);
 	}
 
 	/*
 	 * Fixup numfreebuffers count.  The bp is on an appropriate queue
 	 * unless locked.  We then bump numfreebuffers if it is not B_DELWRI.
 	 * We've already handled the B_INVAL case ( B_DELWRI will be clear
 	 * if B_INVAL is set ).
 	 */
 
 	if ((bp->b_flags & B_LOCKED) == 0 && !(bp->b_flags & B_DELWRI))
 		bufcountwakeup();
 
 	/*
 	 * Something we can maybe free or reuse
 	 */
 	if (bp->b_bufsize || bp->b_kvasize)
 		bufspacewakeup();
 
 	/* unlock */
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("brelse: not dirty");
 	splx(s);
 }
 
 /*
  * Release a buffer back to the appropriate queue but do not try to free
  * it.  The buffer is expected to be used again soon.
  *
  * bqrelse() is used by bdwrite() to requeue a delayed write, and used by
  * biodone() to requeue an async I/O on completion.  It is also used when
  * known good buffers need to be requeued but we think we may need the data
  * again soon.
  */
 void
 bqrelse(struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("bqrelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("bqrelse: free buffer onto another queue???");
 	if (BUF_REFCNT(bp) > 1) {
 		/* do not release to free list */
 		BUF_UNLOCK(bp);
 		splx(s);
 		return;
 	}
 	if (bp->b_flags & B_LOCKED) {
 		bp->b_ioflags &= ~BIO_ERROR;
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 		/* buffers with stale but valid contents */
 	} else if (bp->b_flags & B_DELWRI) {
 		bp->b_qindex = QUEUE_DIRTY;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
 	} else if (vm_page_count_severe()) {
 		/*
 		 * We are too low on memory, we have to try to free the
 		 * buffer (most importantly: the wired pages making up its
 		 * backing store) *now*.
 		 */
 		splx(s);
 		brelse(bp);
 		return;
 	} else {
 		bp->b_qindex = QUEUE_CLEAN;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
 	}
 
 	if ((bp->b_flags & B_LOCKED) == 0 &&
 	    ((bp->b_flags & B_INVAL) || !(bp->b_flags & B_DELWRI))) {
 		bufcountwakeup();
 	}
 
 	/*
 	 * Something we can maybe free or reuse.
 	 */
 	if (bp->b_bufsize && !(bp->b_flags & B_DELWRI))
 		bufspacewakeup();
 
 	/* unlock */
 	BUF_UNLOCK(bp);
 	bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	bp->b_ioflags &= ~BIO_ORDERED;
 	if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
 		panic("bqrelse: not dirty");
 	splx(s);
 }
 
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
 {
 	int i, s;
 	vm_page_t m;
 
 	s = splvm();
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
 		/*
 		 * In order to keep page LRU ordering consistent, put
 		 * everything on the inactive queue.
 		 */
 		vm_page_unwire(m, 0);
 		/*
 		 * We don't mess with busy pages, it is
 		 * the responsibility of the process that
 		 * busied the pages to deal with them.
 		 */
 		if ((m->flags & PG_BUSY) || (m->busy != 0))
 			continue;
 			
 		if (m->wire_count == 0) {
 			vm_page_flag_clear(m, PG_ZERO);
 			/*
 			 * Might as well free the page if we can and it has
 			 * no valid data.
 			 */
 			if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && m->hold_count == 0) {
 				vm_page_busy(m);
 				vm_page_protect(m, VM_PROT_NONE);
 				vm_page_free(m);
 			} else if (vm_page_count_severe()) {
 				vm_page_try_to_cache(m);
 			}
 		}
 	}
 	splx(s);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	if (bp->b_bufsize) {
 		bufspacewakeup();
 		bp->b_bufsize = 0;
 	}
 	bp->b_npages = 0;
 	bp->b_flags &= ~B_VMIO;
 	if (bp->b_vp)
 		brelvp(bp);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 gbincore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 	struct bufhashhdr *bh;
 
 	bh = bufhash(vp, blkno);
 
 	/* Search hash chain */
 	LIST_FOREACH(bp, bh, b_hash) {
 		/* hit */
 		if (bp->b_vp == vp && bp->b_lblkno == blkno &&
 		    (bp->b_flags & B_INVAL) == 0) {
 			break;
 		}
 	}
 	return (bp);
 }
 
 /*
  *	vfs_bio_awrite:
  *
  *	Implement clustered async writes for clearing out B_DELWRI buffers.
  *	This is much better then the old way of writing only one buffer at
  *	a time.  Note that we may not be presented with the buffers in the 
  *	correct order, so we search for the cluster in both directions.
  */
 int
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
 	int j;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
 	int ncl;
 	struct buf *bpa;
 	int nwritten;
 	int size;
 	int maxcl;
 
 	s = splbio();
 	/*
 	 * right now we support clustered writing only to regular files.  If
 	 * we find a clusterable block we could be in the middle of a cluster
 	 * rather then at the beginning.
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
 	    (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 
 		size = vp->v_mount->mnt_stat.f_iosize;
 		maxcl = MAXPHYS / size;
 
 		for (i = 1; i < maxcl; i++) {
 			if ((bpa = gbincore(vp, lblkno + i)) &&
 			    BUF_REFCNT(bpa) == 0 &&
 			    ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno !=
 				     bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
 		for (j = 1; i + j <= maxcl && j <= lblkno; j++) {
 			if ((bpa = gbincore(vp, lblkno - j)) &&
 			    BUF_REFCNT(bpa) == 0 &&
 			    ((bpa->b_flags & (B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno !=
 				     bp->b_blkno - ((j * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
 		--j;
 		ncl = i + j;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			nwritten = cluster_wbuild(vp, size, lblkno - j, ncl);
 			splx(s);
 			return nwritten;
 		}
 	}
 
 	BUF_LOCK(bp, LK_EXCLUSIVE);
 	bremfree(bp);
 	bp->b_flags |= B_ASYNC;
 
 	splx(s);
 	/*
 	 * default (old) behavior, writing out only one block
 	 *
 	 * XXX returns b_bufsize instead of b_bcount for nwritten?
 	 */
 	nwritten = bp->b_bufsize;
 	(void) BUF_WRITE(bp);
 
 	return nwritten;
 }
 
 /*
  *	getnewbuf:
  *
  *	Find and initialize a new buffer header, freeing up existing buffers 
  *	in the bufqueues as necessary.  The new buffer is returned locked.
  *
  *	Important:  B_INVAL is not set.  If the caller wishes to throw the
  *	buffer away, the caller must set B_INVAL prior to calling brelse().
  *
  *	We block if:
  *		We have insufficient buffer headers
  *		We have insufficient buffer space
  *		buffer_map is too fragmented ( space reservation fails )
  *		If we have to flush dirty buffers ( but we try to avoid this )
  *
  *	To avoid VFS layer recursion we do not flush dirty buffers ourselves.
  *	Instead we ask the buf daemon to do it for us.  We attempt to
  *	avoid piecemeal wakeups of the pageout daemon.
  */
 
 static struct buf *
 getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
 {
 	struct buf *bp;
 	struct buf *nbp;
 	int defrag = 0;
 	int nqindex;
 	static int flushingbufs;
 
 	/*
 	 * We can't afford to block since we might be holding a vnode lock,
 	 * which may prevent system daemons from running.  We deal with
 	 * low-memory situations by proactively returning memory and running
 	 * async I/O rather then sync I/O.
 	 */
 
 	++getnewbufcalls;
 	--getnewbufrestarts;
 restart:
 	++getnewbufrestarts;
 
 	/*
 	 * Setup for scan.  If we do not have enough free buffers,
 	 * we setup a degenerate case that immediately fails.  Note
 	 * that if we are specially marked process, we are allowed to
 	 * dip into our reserves.
 	 *
 	 * The scanning sequence is nominally:  EMPTY->EMPTYKVA->CLEAN
 	 *
 	 * We start with EMPTYKVA.  If the list is empty we backup to EMPTY.
 	 * However, there are a number of cases (defragging, reusing, ...)
 	 * where we cannot backup.
 	 */
 	nqindex = QUEUE_EMPTYKVA;
 	nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
 
 	if (nbp == NULL) {
 		/*
 		 * If no EMPTYKVA buffers and we are either
 		 * defragging or reusing, locate a CLEAN buffer
 		 * to free or reuse.  If bufspace useage is low
 		 * skip this step so we can allocate a new buffer.
 		 */
 		if (defrag || bufspace >= lobufspace) {
 			nqindex = QUEUE_CLEAN;
 			nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
 		}
 
 		/*
 		 * If we could not find or were not allowed to reuse a
 		 * CLEAN buffer, check to see if it is ok to use an EMPTY
 		 * buffer.  We can only use an EMPTY buffer if allocating
 		 * its KVA would not otherwise run us out of buffer space.
 		 */
 		if (nbp == NULL && defrag == 0 &&
 		    bufspace + maxsize < hibufspace) {
 			nqindex = QUEUE_EMPTY;
 			nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
 		}
 	}
 
 	/*
 	 * Run scan, possibly freeing data and/or kva mappings on the fly
 	 * depending.
 	 */
 
 	while ((bp = nbp) != NULL) {
 		int qindex = nqindex;
 
 		/*
 		 * Calculate next bp ( we can only use it if we do not block
 		 * or do other fancy things ).
 		 */
 		if ((nbp = TAILQ_NEXT(bp, b_freelist)) == NULL) {
 			switch(qindex) {
 			case QUEUE_EMPTY:
 				nqindex = QUEUE_EMPTYKVA;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA])))
 					break;
 				/* fall through */
 			case QUEUE_EMPTYKVA:
 				nqindex = QUEUE_CLEAN;
 				if ((nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN])))
 					break;
 				/* fall through */
 			case QUEUE_CLEAN:
 				/*
 				 * nbp is NULL. 
 				 */
 				break;
 			}
 		}
 
 		/*
 		 * Sanity Checks
 		 */
 		KASSERT(bp->b_qindex == qindex, ("getnewbuf: inconsistant queue %d bp %p", qindex, bp));
 
 		/*
 		 * Note: we no longer distinguish between VMIO and non-VMIO
 		 * buffers.
 		 */
 
 		KASSERT((bp->b_flags & B_DELWRI) == 0, ("delwri buffer %p found in queue %d", bp, qindex));
 
 		/*
 		 * If we are defragging then we need a buffer with 
 		 * b_kvasize != 0.  XXX this situation should no longer
 		 * occur, if defrag is non-zero the buffer's b_kvasize
 		 * should also be non-zero at this point.  XXX
 		 */
 		if (defrag && bp->b_kvasize == 0) {
 			printf("Warning: defrag empty buffer %p\n", bp);
 			continue;
 		}
 
 		/*
 		 * Start freeing the bp.  This is somewhat involved.  nbp
 		 * remains valid only for QUEUE_EMPTY[KVA] bp's.
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
 			panic("getnewbuf: locked buf");
 		bremfree(bp);
 
 		if (qindex == QUEUE_CLEAN) {
 			if (bp->b_flags & B_VMIO) {
 				bp->b_flags &= ~B_ASYNC;
 				vfs_vmio_release(bp);
 			}
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 
 		/*
 		 * NOTE:  nbp is now entirely invalid.  We can only restart
 		 * the scan from this point on.
 		 *
 		 * Get the rest of the buffer freed up.  b_kva* is still
 		 * valid after this operation.
 		 */
 
 		if (bp->b_rcred != NOCRED) {
 			crfree(bp->b_rcred);
 			bp->b_rcred = NOCRED;
 		}
 		if (bp->b_wcred != NOCRED) {
 			crfree(bp->b_wcred);
 			bp->b_wcred = NOCRED;
 		}
 		if (LIST_FIRST(&bp->b_dep) != NULL)
 			buf_deallocate(bp);
 		if (bp->b_xflags & BX_BKGRDINPROG)
 			panic("losing buffer 3");
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 
 		if (bp->b_bufsize)
 			allocbuf(bp, 0);
 
 		bp->b_flags = 0;
 		bp->b_ioflags = 0;
 		bp->b_xflags = 0;
 		bp->b_dev = NODEV;
 		bp->b_vp = NULL;
 		bp->b_blkno = bp->b_lblkno = 0;
 		bp->b_offset = NOOFFSET;
 		bp->b_iodone = 0;
 		bp->b_error = 0;
 		bp->b_resid = 0;
 		bp->b_bcount = 0;
 		bp->b_npages = 0;
 		bp->b_dirtyoff = bp->b_dirtyend = 0;
+		bp->b_magic = B_MAGIC_BIO;
+		bp->b_op = &buf_ops_bio;
 
 		LIST_INIT(&bp->b_dep);
 
 		/*
 		 * If we are defragging then free the buffer.
 		 */
 		if (defrag) {
 			bp->b_flags |= B_INVAL;
 			bfreekva(bp);
 			brelse(bp);
 			defrag = 0;
 			goto restart;
 		}
 
 		/*
 		 * If we are overcomitted then recover the buffer and its
 		 * KVM space.  This occurs in rare situations when multiple
 		 * processes are blocked in getnewbuf() or allocbuf().
 		 */
 		if (bufspace >= hibufspace)
 			flushingbufs = 1;
 		if (flushingbufs && bp->b_kvasize != 0) {
 			bp->b_flags |= B_INVAL;
 			bfreekva(bp);
 			brelse(bp);
 			goto restart;
 		}
 		if (bufspace < lobufspace)
 			flushingbufs = 0;
 		break;
 	}
 
 	/*
 	 * If we exhausted our list, sleep as appropriate.  We may have to
 	 * wakeup various daemons and write out some dirty buffers.
 	 *
 	 * Generally we are sleeping due to insufficient buffer space.
 	 */
 
 	if (bp == NULL) {
 		int flags;
 		char *waitmsg;
 
 		if (defrag) {
 			flags = VFS_BIO_NEED_BUFSPACE;
 			waitmsg = "nbufkv";
 		} else if (bufspace >= hibufspace) {
 			waitmsg = "nbufbs";
 			flags = VFS_BIO_NEED_BUFSPACE;
 		} else {
 			waitmsg = "newbuf";
 			flags = VFS_BIO_NEED_ANY;
 		}
 
 		bd_speedup();	/* heeeelp */
 
 		needsbuffer |= flags;
 		while (needsbuffer & flags) {
 			if (tsleep(&needsbuffer, (PRIBIO + 4) | slpflag,
 			    waitmsg, slptimeo))
 				return (NULL);
 		}
 	} else {
 		/*
 		 * We finally have a valid bp.  We aren't quite out of the
 		 * woods, we still have to reserve kva space.  In order
 		 * to keep fragmentation sane we only allocate kva in
 		 * BKVASIZE chunks.
 		 */
 		maxsize = (maxsize + BKVAMASK) & ~BKVAMASK;
 
 		if (maxsize != bp->b_kvasize) {
 			vm_offset_t addr = 0;
 
 			bfreekva(bp);
 
 			if (vm_map_findspace(buffer_map,
 				vm_map_min(buffer_map), maxsize, &addr)) {
 				/*
 				 * Uh oh.  Buffer map is to fragmented.  We
 				 * must defragment the map.
 				 */
 				++bufdefragcnt;
 				defrag = 1;
 				bp->b_flags |= B_INVAL;
 				brelse(bp);
 				goto restart;
 			}
 			if (addr) {
 				vm_map_insert(buffer_map, NULL, 0,
 					addr, addr + maxsize,
 					VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 
 				bp->b_kvabase = (caddr_t) addr;
 				bp->b_kvasize = maxsize;
 				bufspace += bp->b_kvasize;
 				++bufreusecnt;
 			}
 		}
 		bp->b_data = bp->b_kvabase;
 	}
 	return(bp);
 }
 
 /*
  *	buf_daemon:
  *
  *	buffer flushing daemon.  Buffers are normally flushed by the
  *	update daemon but if it cannot keep up this process starts to
  *	take the load in an attempt to prevent getnewbuf() from blocking.
  */
 
 static struct proc *bufdaemonproc;
 
 static struct kproc_desc buf_kp = {
 	"bufdaemon",
 	buf_daemon,
 	&bufdaemonproc
 };
 SYSINIT(bufdaemon, SI_SUB_KTHREAD_BUF, SI_ORDER_FIRST, kproc_start, &buf_kp)
 
 static void
 buf_daemon()
 {
 	int s;
 
 	mtx_lock(&Giant);
 
 	/*
 	 * This process needs to be suspended prior to shutdown sync.
 	 */
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, bufdaemonproc,
 	    SHUTDOWN_PRI_LAST);
 
 	/*
 	 * This process is allowed to take the buffer cache to the limit
 	 */
 	curproc->p_flag |= P_BUFEXHAUST;
 	s = splbio();
 
 	for (;;) {
 		kthread_suspend_check(bufdaemonproc);
 
 		bd_request = 0;
 
 		/*
 		 * Do the flush.  Limit the amount of in-transit I/O we
 		 * allow to build up, otherwise we would completely saturate
 		 * the I/O system.  Wakeup any waiting processes before we
 		 * normally would so they can run in parallel with our drain.
 		 */
 		while (numdirtybuffers > lodirtybuffers) {
 			if (flushbufqueues() == 0)
 				break;
 			waitrunningbufspace();
 			numdirtywakeup((lodirtybuffers + hidirtybuffers) / 2);
 		}
 
 		/*
 		 * Only clear bd_request if we have reached our low water
 		 * mark.  The buf_daemon normally waits 5 seconds and
 		 * then incrementally flushes any dirty buffers that have
 		 * built up, within reason.
 		 *
 		 * If we were unable to hit our low water mark and couldn't
 		 * find any flushable buffers, we sleep half a second.
 		 * Otherwise we loop immediately.
 		 */
 		if (numdirtybuffers <= lodirtybuffers) {
 			/*
 			 * We reached our low water mark, reset the
 			 * request and sleep until we are needed again.
 			 * The sleep is just so the suspend code works.
 			 */
 			bd_request = 0;
 			tsleep(&bd_request, PVM, "psleep", hz);
 		} else {
 			/*
 			 * We couldn't find any flushable dirty buffers but
 			 * still have too many dirty buffers, we
 			 * have to sleep and try again.  (rare)
 			 */
 			tsleep(&bd_request, PVM, "qsleep", hz / 2);
 		}
 	}
 }
 
 /*
  *	flushbufqueues:
  *
  *	Try to flush a buffer in the dirty queue.  We must be careful to
  *	free up B_INVAL buffers instead of write them, which NFS is 
  *	particularly sensitive to.
  */
 
 static int
 flushbufqueues(void)
 {
 	struct buf *bp;
 	int r = 0;
 
 	bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]);
 
 	while (bp) {
 		KASSERT((bp->b_flags & B_DELWRI), ("unexpected clean buffer %p", bp));
 		if ((bp->b_flags & B_DELWRI) != 0 &&
 		    (bp->b_xflags & BX_BKGRDINPROG) == 0) {
 			if (bp->b_flags & B_INVAL) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT) != 0)
 					panic("flushbufqueues: locked buf");
 				bremfree(bp);
 				brelse(bp);
 				++r;
 				break;
 			}
 			if (LIST_FIRST(&bp->b_dep) != NULL &&
 			    (bp->b_flags & B_DEFERRED) == 0 &&
 			    buf_countdeps(bp, 0)) {
 				TAILQ_REMOVE(&bufqueues[QUEUE_DIRTY],
 				    bp, b_freelist);
 				TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY],
 				    bp, b_freelist);
 				bp->b_flags |= B_DEFERRED;
 				bp = TAILQ_FIRST(&bufqueues[QUEUE_DIRTY]);
 				continue;
 			}
 			vfs_bio_awrite(bp);
 			++r;
 			break;
 		}
 		bp = TAILQ_NEXT(bp, b_freelist);
 	}
 	return (r);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 
 	int s = splbio();
 	bp = gbincore(vp, blkno);
 	splx(s);
 	return (bp);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t toff, tinc, size;
 	vm_page_t m;
 	vm_ooffset_t off;
 
 	if (incore(vp, blkno))
 		return 1;
 	if (vp->v_mount == NULL)
 		return 0;
 	if (VOP_GETVOBJECT(vp, &obj) != 0 || (vp->v_flag & VOBJBUF) == 0)
 		return 0;
 
 	size = PAGE_SIZE;
 	if (size > vp->v_mount->mnt_stat.f_iosize)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
 			return 0;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
 			return 0;
 	}
 	return 1;
 }
 
 /*
  *	vfs_setdirty:
  *
  *	Sets the dirty range for a buffer based on the status of the dirty
  *	bits in the pages comprising the buffer.
  *
  *	The range is limited to the size of the buffer.
  *
  *	This routine is primarily used by NFS, but is generalized for the
  *	B_VMIO case.
  */
 static void
 vfs_setdirty(struct buf *bp) 
 {
 	int i;
 	vm_object_t object;
 
 	/*
 	 * Degenerate case - empty buffer
 	 */
 
 	if (bp->b_bufsize == 0)
 		return;
 
 	/*
 	 * We qualify the scan for modified pages on whether the
 	 * object has been flushed yet.  The OBJ_WRITEABLE flag
 	 * is not cleared simply by protecting pages off.
 	 */
 
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
 	object = bp->b_pages[0]->object;
 
 	if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p writeable but not mightbedirty\n", object);
 	if (!(object->flags & OBJ_WRITEABLE) && (object->flags & OBJ_MIGHTBEDIRTY))
 		printf("Warning: object %p mightbedirty but not writeable\n", object);
 
 	if (object->flags & (OBJ_MIGHTBEDIRTY|OBJ_CLEANING)) {
 		vm_offset_t boffset;
 		vm_offset_t eoffset;
 
 		/*
 		 * test the pages to see if they have been modified directly
 		 * by users through the VM system.
 		 */
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 			vm_page_test_dirty(bp->b_pages[i]);
 		}
 
 		/*
 		 * Calculate the encompassing dirty range, boffset and eoffset,
 		 * (eoffset - boffset) bytes.
 		 */
 
 		for (i = 0; i < bp->b_npages; i++) {
 			if (bp->b_pages[i]->dirty)
 				break;
 		}
 		boffset = (i << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		for (i = bp->b_npages - 1; i >= 0; --i) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		eoffset = ((i + 1) << PAGE_SHIFT) - (bp->b_offset & PAGE_MASK);
 
 		/*
 		 * Fit it to the buffer.
 		 */
 
 		if (eoffset > bp->b_bcount)
 			eoffset = bp->b_bcount;
 
 		/*
 		 * If we have a good dirty range, merge with the existing
 		 * dirty range.
 		 */
 
 		if (boffset < eoffset) {
 			if (bp->b_dirtyoff > boffset)
 				bp->b_dirtyoff = boffset;
 			if (bp->b_dirtyend < eoffset)
 				bp->b_dirtyend = eoffset;
 		}
 	}
 }
 
 /*
  *	getblk:
  *
  *	Get a block given a specified block and offset into a file/device.
  *	The buffers B_DONE bit will be cleared on return, making it almost
  * 	ready for an I/O initiation.  B_INVAL may or may not be set on 
  *	return.  The caller should clear B_INVAL prior to initiating a
  *	READ.
  *
  *	For a non-VMIO buffer, B_CACHE is set to the opposite of B_INVAL for
  *	an existing buffer.
  *
  *	For a VMIO buffer, B_CACHE is modified according to the backing VM.
  *	If getblk()ing a previously 0-sized invalid buffer, B_CACHE is set
  *	and then cleared based on the backing VM.  If the previous buffer is
  *	non-0-sized but invalid, B_CACHE will be cleared.
  *
  *	If getblk() must create a new buffer, the new buffer is returned with
  *	both B_INVAL and B_CACHE clear unless it is a VMIO buffer, in which
  *	case it is returned with B_INVAL clear and B_CACHE set based on the
  *	backing VM.
  *
- *	getblk() also forces a VOP_BWRITE() for any B_DELWRI buffer whos
+ *	getblk() also forces a BUF_WRITE() for any B_DELWRI buffer whos
  *	B_CACHE bit is clear.
  *	
  *	What this means, basically, is that the caller should use B_CACHE to
  *	determine whether the buffer is fully valid or not and should clear
  *	B_INVAL prior to issuing a read.  If the caller intends to validate
  *	the buffer by loading its data area with something, the caller needs
  *	to clear B_INVAL.  If the caller does this without issuing an I/O, 
  *	the caller should set B_CACHE ( as an optimization ), else the caller
  *	should issue the I/O and biodone() will set B_CACHE if the I/O was
  *	a write attempt or if it was a successfull read.  If the caller 
  *	intends to issue a READ, the caller must clear B_INVAL and BIO_ERROR
  *	prior to issuing the READ.  biodone() will *not* clear B_INVAL.
  */
 struct buf *
 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 {
 	struct buf *bp;
 	int s;
 	struct bufhashhdr *bh;
 
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
 
 	s = splbio();
 loop:
 	/*
 	 * Block if we are low on buffers.   Certain processes are allowed
 	 * to completely exhaust the buffer cache.
          *
          * If this check ever becomes a bottleneck it may be better to
          * move it into the else, when gbincore() fails.  At the moment
          * it isn't a problem.
 	 *
 	 * XXX remove if 0 sections (clean this up after its proven)
          */
 	if (numfreebuffers == 0) {
 		if (curproc == PCPU_GET(idleproc))
 			return NULL;
 		needsbuffer |= VFS_BIO_NEED_ANY;
 	}
 
 	if ((bp = gbincore(vp, blkno))) {
 		/*
 		 * Buffer is in-core.  If the buffer is not busy, it must
 		 * be on a queue.
 		 */
 
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "getblk", slpflag, slptimeo) == ENOLCK)
 				goto loop;
 			splx(s);
 			return (struct buf *) NULL;
 		}
 
 		/*
 		 * The buffer is locked.  B_CACHE is cleared if the buffer is 
 		 * invalid.  Ohterwise, for a non-VMIO buffer, B_CACHE is set
 		 * and for a VMIO buffer B_CACHE is adjusted according to the
 		 * backing VM cache.
 		 */
 		if (bp->b_flags & B_INVAL)
 			bp->b_flags &= ~B_CACHE;
 		else if ((bp->b_flags & (B_VMIO | B_INVAL)) == 0)
 			bp->b_flags |= B_CACHE;
 		bremfree(bp);
 
 		/*
 		 * check for size inconsistancies for non-VMIO case.
 		 */
 
 		if (bp->b_bcount != size) {
 			if ((bp->b_flags & B_VMIO) == 0 ||
 			    (size > bp->b_kvasize)) {
 				if (bp->b_flags & B_DELWRI) {
 					bp->b_flags |= B_NOCACHE;
 					BUF_WRITE(bp);
 				} else {
 					if ((bp->b_flags & B_VMIO) &&
 					   (LIST_FIRST(&bp->b_dep) == NULL)) {
 						bp->b_flags |= B_RELBUF;
 						brelse(bp);
 					} else {
 						bp->b_flags |= B_NOCACHE;
 						BUF_WRITE(bp);
 					}
 				}
 				goto loop;
 			}
 		}
 
 		/*
 		 * If the size is inconsistant in the VMIO case, we can resize
 		 * the buffer.  This might lead to B_CACHE getting set or
 		 * cleared.  If the size has not changed, B_CACHE remains
 		 * unchanged from its previous state.
 		 */
 
 		if (bp->b_bcount != size)
 			allocbuf(bp, size);
 
 		KASSERT(bp->b_offset != NOOFFSET, 
 		    ("getblk: no buffer offset"));
 
 		/*
 		 * A buffer with B_DELWRI set and B_CACHE clear must
 		 * be committed before we can return the buffer in
 		 * order to prevent the caller from issuing a read
 		 * ( due to B_CACHE not being set ) and overwriting
 		 * it.
 		 *
 		 * Most callers, including NFS and FFS, need this to
 		 * operate properly either because they assume they
 		 * can issue a read if B_CACHE is not set, or because
 		 * ( for example ) an uncached B_DELWRI might loop due 
 		 * to softupdates re-dirtying the buffer.  In the latter
 		 * case, B_CACHE is set after the first write completes,
 		 * preventing further loops.
 		 */
 
 		if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) {
 			BUF_WRITE(bp);
 			goto loop;
 		}
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	} else {
 		/*
 		 * Buffer is not in-core, create new buffer.  The buffer
 		 * returned by getnewbuf() is locked.  Note that the returned
 		 * buffer is also considered valid (not marked B_INVAL).
 		 */
 		int bsize, maxsize, vmio;
 		off_t offset;
 
 		if (vn_isdisk(vp, NULL))
 			bsize = DEV_BSIZE;
 		else if (vp->v_mountedhere)
 			bsize = vp->v_mountedhere->mnt_stat.f_iosize;
 		else if (vp->v_mount)
 			bsize = vp->v_mount->mnt_stat.f_iosize;
 		else
 			bsize = size;
 
 		offset = (off_t)blkno * bsize;
 		vmio = (VOP_GETVOBJECT(vp, NULL) == 0) && (vp->v_flag & VOBJBUF);
 		maxsize = vmio ? size + (offset & PAGE_MASK) : size;
 		maxsize = imax(maxsize, bsize);
 
 		if ((bp = getnewbuf(slpflag, slptimeo, size, maxsize)) == NULL) {
 			if (slpflag || slptimeo) {
 				splx(s);
 				return NULL;
 			}
 			goto loop;
 		}
 
 		/*
 		 * This code is used to make sure that a buffer is not
 		 * created while the getnewbuf routine is blocked.
 		 * This can be a problem whether the vnode is locked or not.
 		 * If the buffer is created out from under us, we have to
 		 * throw away the one we just created.  There is now window
 		 * race because we are safely running at splbio() from the
 		 * point of the duplicate buffer creation through to here,
 		 * and we've locked the buffer.
 		 */
 		if (gbincore(vp, blkno)) {
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto loop;
 		}
 
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bp->b_offset = offset;
 
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = bufhash(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
 		/*
 		 * set B_VMIO bit.  allocbuf() the buffer bigger.  Since the
 		 * buffer size starts out as 0, B_CACHE will be set by
 		 * allocbuf() for the VMIO case prior to it testing the
 		 * backing store for validity.
 		 */
 
 		if (vmio) {
 			bp->b_flags |= B_VMIO;
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG)
 				printf("getblk: vmioing file type %d???\n", vp->v_type);
 #endif
 		} else {
 			bp->b_flags &= ~B_VMIO;
 		}
 
 		allocbuf(bp, size);
 
 		splx(s);
 		bp->b_flags &= ~B_DONE;
 	}
 	return (bp);
 }
 
 /*
  * Get an empty, disassociated buffer of given size.  The buffer is initially
  * set to B_INVAL.
  */
 struct buf *
 geteblk(int size)
 {
 	struct buf *bp;
 	int s;
 	int maxsize;
 
 	maxsize = (size + BKVAMASK) & ~BKVAMASK;
 
 	s = splbio();
 	while ((bp = getnewbuf(0, 0, size, maxsize)) == 0);
 	splx(s);
 	allocbuf(bp, size);
 	bp->b_flags |= B_INVAL;	/* b_dep cleared by getnewbuf() */
 	return (bp);
 }
 
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).  This code is able to
  * resize a buffer up or down.
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistant data situations.  Tread lightly!!! 
  * There are B_CACHE and B_DELWRI interactions that must be dealt with by 
  * the caller.  Calling this code willy nilly can result in the loss of data.
  *
  * allocbuf() only adjusts B_CACHE for VMIO buffers.  getblk() deals with
  * B_CACHE for the non-VMIO case.
  */
 
 int
 allocbuf(struct buf *bp, int size)
 {
 	int newbsize, mbsize;
 	int i;
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("allocbuf: buffer not busy");
 
 	if (bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 
 	if ((bp->b_flags & B_VMIO) == 0) {
 		caddr_t origbuf;
 		int origbufsize;
 		/*
 		 * Just get anonymous memory from the kernel.  Don't
 		 * mess with B_CACHE.
 		 */
 		mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			newbsize = mbsize;
 		else
 #endif
 			newbsize = round_page(size);
 
 		if (newbsize < bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * malloced buffers are not shrunk
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				if (newbsize) {
 					bp->b_bcount = size;
 				} else {
 					free(bp->b_data, M_BIOBUF);
 					if (bp->b_bufsize) {
 						bufmallocspace -= bp->b_bufsize;
 						bufspacewakeup();
 						bp->b_bufsize = 0;
 					}
 					bp->b_data = bp->b_kvabase;
 					bp->b_bcount = 0;
 					bp->b_flags &= ~B_MALLOC;
 				}
 				return 1;
 			}		
 #endif
 			vm_hold_free_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + newbsize,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize);
 		} else if (newbsize > bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * We only use malloced memory on the first allocation.
 			 * and revert to page-allocated memory when the buffer
 			 * grows.
 			 */
 			if ( (bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&
 				(mbsize <= PAGE_SIZE/2)) {
 
 				bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
 				bp->b_bufsize = mbsize;
 				bp->b_bcount = size;
 				bp->b_flags |= B_MALLOC;
 				bufmallocspace += mbsize;
 				return 1;
 			}
 #endif
 			origbuf = NULL;
 			origbufsize = 0;
 #if !defined(NO_B_MALLOC)
 			/*
 			 * If the buffer is growing on its other-than-first allocation,
 			 * then we revert to the page-allocation scheme.
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				origbuf = bp->b_data;
 				origbufsize = bp->b_bufsize;
 				bp->b_data = bp->b_kvabase;
 				if (bp->b_bufsize) {
 					bufmallocspace -= bp->b_bufsize;
 					bufspacewakeup();
 					bp->b_bufsize = 0;
 				}
 				bp->b_flags &= ~B_MALLOC;
 				newbsize = round_page(newbsize);
 			}
 #endif
 			vm_hold_load_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize,
 			    (vm_offset_t) bp->b_data + newbsize);
 #if !defined(NO_B_MALLOC)
 			if (origbuf) {
 				bcopy(origbuf, bp->b_data, origbufsize);
 				free(origbuf, M_BIOBUF);
 			}
 #endif
 		}
 	} else {
 		vm_page_t m;
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		desiredpages = (size == 0) ? 0 :
 			num_pages((bp->b_offset & PAGE_MASK) + newbsize);
 
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			panic("allocbuf: VMIO buffer can't be malloced");
 #endif
 		/*
 		 * Set B_CACHE initially if buffer is 0 length or will become
 		 * 0-length.
 		 */
 		if (size == 0 || bp->b_bufsize == 0)
 			bp->b_flags |= B_CACHE;
 
 		if (newbsize < bp->b_bufsize) {
 			/*
 			 * DEV_BSIZE aligned new buffer size is less then the
 			 * DEV_BSIZE aligned existing buffer size.  Figure out
 			 * if we have to remove any pages.
 			 */
 			if (desiredpages < bp->b_npages) {
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
 					 * the page is not freed here -- it
 					 * is the responsibility of 
 					 * vnode_pager_setsize
 					 */
 					m = bp->b_pages[i];
 					KASSERT(m != bogus_page,
 					    ("allocbuf: bogus page found"));
 					while (vm_page_sleep_busy(m, TRUE, "biodep"))
 						;
 
 					bp->b_pages[i] = NULL;
 					vm_page_unwire(m, 0);
 				}
 				pmap_qremove((vm_offset_t) trunc_page((vm_offset_t)bp->b_data) +
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
 		} else if (size > bp->b_bcount) {
 			/*
 			 * We are growing the buffer, possibly in a 
 			 * byte-granular fashion.
 			 */
 			struct vnode *vp;
 			vm_object_t obj;
 			vm_offset_t toff;
 			vm_offset_t tinc;
 
 			/*
 			 * Step 1, bring in the VM pages from the object, 
 			 * allocating them if necessary.  We must clear
 			 * B_CACHE if these pages are not valid for the 
 			 * range covered by the buffer.
 			 */
 
 			vp = bp->b_vp;
 			VOP_GETVOBJECT(vp, &obj);
 
 			while (bp->b_npages < desiredpages) {
 				vm_page_t m;
 				vm_pindex_t pi;
 
 				pi = OFF_TO_IDX(bp->b_offset) + bp->b_npages;
 				if ((m = vm_page_lookup(obj, pi)) == NULL) {
 					/*
 					 * note: must allocate system pages
 					 * since blocking here could intefere
 					 * with paging I/O, no matter which
 					 * process we are.
 					 */
 					m = vm_page_alloc(obj, pi, VM_ALLOC_SYSTEM);
 					if (m == NULL) {
 						VM_WAIT;
 						vm_pageout_deficit += desiredpages - bp->b_npages;
 					} else {
 						vm_page_wire(m);
 						vm_page_wakeup(m);
 						bp->b_flags &= ~B_CACHE;
 						bp->b_pages[bp->b_npages] = m;
 						++bp->b_npages;
 					}
 					continue;
 				}
 
 				/*
 				 * We found a page.  If we have to sleep on it,
 				 * retry because it might have gotten freed out
 				 * from under us.
 				 *
 				 * We can only test PG_BUSY here.  Blocking on
 				 * m->busy might lead to a deadlock:
 				 *
 				 *  vm_fault->getpages->cluster_read->allocbuf
 				 *
 				 */
 
 				if (vm_page_sleep_busy(m, FALSE, "pgtblk"))
 					continue;
 
 				/*
 				 * We have a good page.  Should we wakeup the
 				 * page daemon?
 				 */
 				if ((curproc != pageproc) &&
 				    ((m->queue - m->pc) == PQ_CACHE) &&
 				    ((cnt.v_free_count + cnt.v_cache_count) <
 					(cnt.v_free_min + cnt.v_cache_min))) {
 					pagedaemon_wakeup();
 				}
 				vm_page_flag_clear(m, PG_ZERO);
 				vm_page_wire(m);
 				bp->b_pages[bp->b_npages] = m;
 				++bp->b_npages;
 			}
 
 			/*
 			 * Step 2.  We've loaded the pages into the buffer,
 			 * we have to figure out if we can still have B_CACHE
 			 * set.  Note that B_CACHE is set according to the
 			 * byte-granular range ( bcount and size ), new the
 			 * aligned range ( newbsize ).
 			 *
 			 * The VM test is against m->valid, which is DEV_BSIZE
 			 * aligned.  Needless to say, the validity of the data
 			 * needs to also be DEV_BSIZE aligned.  Note that this
 			 * fails with NFS if the server or some other client
 			 * extends the file's EOF.  If our buffer is resized, 
 			 * B_CACHE may remain set! XXX
 			 */
 
 			toff = bp->b_bcount;
 			tinc = PAGE_SIZE - ((bp->b_offset + toff) & PAGE_MASK);
 
 			while ((bp->b_flags & B_CACHE) && toff < size) {
 				vm_pindex_t pi;
 
 				if (tinc > (size - toff))
 					tinc = size - toff;
 
 				pi = ((bp->b_offset & PAGE_MASK) + toff) >> 
 				    PAGE_SHIFT;
 
 				vfs_buf_test_cache(
 				    bp, 
 				    bp->b_offset,
 				    toff, 
 				    tinc, 
 				    bp->b_pages[pi]
 				);
 				toff += tinc;
 				tinc = PAGE_SIZE;
 			}
 
 			/*
 			 * Step 3, fixup the KVM pmap.  Remember that
 			 * bp->b_data is relative to bp->b_offset, but 
 			 * bp->b_offset may be offset into the first page.
 			 */
 
 			bp->b_data = (caddr_t)
 			    trunc_page((vm_offset_t)bp->b_data);
 			pmap_qenter(
 			    (vm_offset_t)bp->b_data,
 			    bp->b_pages, 
 			    bp->b_npages
 			);
 			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
 			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 		}
 	}
 	if (newbsize < bp->b_bufsize)
 		bufspacewakeup();
 	bp->b_bufsize = newbsize;	/* actual buffer allocation	*/
 	bp->b_bcount = size;		/* requested buffer size	*/
 	return 1;
 }
 
 /*
  *	bufwait:
  *
  *	Wait for buffer I/O completion, returning error status.  The buffer
  *	is left locked and B_DONE on return.  B_EINTR is converted into a EINTR
  *	error and cleared.
  */
 int
 bufwait(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 	while ((bp->b_flags & B_DONE) == 0) {
 		if (bp->b_iocmd == BIO_READ)
 			tsleep(bp, PRIBIO, "biord", 0);
 		else
 			tsleep(bp, PRIBIO, "biowr", 0);
 	}
 	splx(s);
 	if (bp->b_flags & B_EINTR) {
 		bp->b_flags &= ~B_EINTR;
 		return (EINTR);
 	}
 	if (bp->b_ioflags & BIO_ERROR) {
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
 	}
 }
 
  /*
   * Call back function from struct bio back up to struct buf.
   * The corresponding initialization lives in sys/conf.h:DEV_STRATEGY().
   */
 void
 bufdonebio(struct bio *bp)
 {
 	bufdone(bp->bio_caller2);
 }
 
 /*
  *	bufdone:
  *
  *	Finish I/O on a buffer, optionally calling a completion function.
  *	This is usually called from an interrupt so process blocking is
  *	not allowed.
  *
  *	biodone is also responsible for setting B_CACHE in a B_VMIO bp.
  *	In a non-VMIO bp, B_CACHE will be set on the next getblk() 
  *	assuming B_INVAL is clear.
  *
  *	For the VMIO case, we set B_CACHE if the op was a read and no
  *	read error occured, or if the op was a write.  B_CACHE is never
  *	set if the buffer is invalid or otherwise uncacheable.
  *
  *	biodone does not mess with B_INVAL, allowing the I/O routine or the
  *	initiator to leave B_INVAL set to brelse the buffer out of existance
  *	in the biodone routine.
  */
 void
 bufdone(struct buf *bp)
 {
 	int s, error;
 	void    (*biodone) __P((struct buf *));
 
 	s = splbio();
 
 	KASSERT(BUF_REFCNT(bp) > 0, ("biodone: bp %p not busy %d", bp, BUF_REFCNT(bp)));
 	KASSERT(!(bp->b_flags & B_DONE), ("biodone: bp %p already done", bp));
 
 	bp->b_flags |= B_DONE;
 	runningbufwakeup(bp);
 
 	if (bp->b_iocmd == BIO_DELETE) {
 		brelse(bp);
 		splx(s);
 		return;
 	}
 
 	if (bp->b_iocmd == BIO_WRITE) {
 		vwakeup(bp);
 	}
 
 	/* call optional completion function if requested */
 	if (bp->b_iodone != NULL) {
 		biodone = bp->b_iodone;
 		bp->b_iodone = NULL;
 		(*biodone) (bp);
 		splx(s);
 		return;
 	}
 	if (LIST_FIRST(&bp->b_dep) != NULL)
 		buf_complete(bp);
 
 	if (bp->b_flags & B_VMIO) {
 		int i;
 		vm_ooffset_t foff;
 		vm_page_t m;
 		vm_object_t obj;
 		int iosize;
 		struct vnode *vp = bp->b_vp;
 
 		error = VOP_GETVOBJECT(vp, &obj);
 
 #if defined(VFS_BIO_DEBUG)
 		if (vp->v_usecount == 0) {
 			panic("biodone: zero vnode ref count");
 		}
 
 		if (error) {
 			panic("biodone: missing VM object");
 		}
 
 		if ((vp->v_flag & VOBJBUF) == 0) {
 			panic("biodone: vnode is not setup for merged cache");
 		}
 #endif
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("biodone: no buffer offset"));
 
 		if (error) {
 			panic("biodone: no object");
 		}
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 			    obj->paging_in_progress, bp->b_npages);
 		}
 #endif
 
 		/*
 		 * Set B_CACHE if the op was a normal read and no error
 		 * occured.  B_CACHE is set for writes in the b*write()
 		 * routines.
 		 */
 		iosize = bp->b_bcount - bp->b_resid;
 		if (bp->b_iocmd == BIO_READ &&
 		    !(bp->b_flags & (B_INVAL|B_NOCACHE)) &&
 		    !(bp->b_ioflags & BIO_ERROR)) {
 			bp->b_flags |= B_CACHE;
 		}
 
 		for (i = 0; i < bp->b_npages; i++) {
 			int bogusflag = 0;
 			int resid;
 
 			resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
 			if (resid > iosize)
 				resid = iosize;
 
 			/*
 			 * cleanup bogus pages, restoring the originals
 			 */
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				bogusflag = 1;
 				m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 				if (m == NULL)
 					panic("biodone: page disappeared!");
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 #if defined(VFS_BIO_DEBUG)
 			if (OFF_TO_IDX(foff) != m->pindex) {
 				printf(
 "biodone: foff(%lu)/m->pindex(%d) mismatch\n",
 				    (unsigned long)foff, m->pindex);
 			}
 #endif
 
 			/*
 			 * In the write case, the valid and clean bits are
 			 * already changed correctly ( see bdwrite() ), so we 
 			 * only need to do this here in the read case.
 			 */
 			if ((bp->b_iocmd == BIO_READ) && !bogusflag && resid > 0) {
 				vfs_page_set_valid(bp, foff, i, m);
 			}
 			vm_page_flag_clear(m, PG_ZERO);
 
 			/*
 			 * when debugging new filesystems or buffer I/O methods, this
 			 * is the most common error that pops up.  if you see this, you
 			 * have not set the page busy flag correctly!!!
 			 */
 			if (m->busy == 0) {
 				printf("biodone: page busy < 0, "
 				    "pindex: %d, foff: 0x(%x,%x), "
 				    "resid: %d, index: %d\n",
 				    (int) m->pindex, (int)(foff >> 32),
 						(int) foff & 0xffffffff, resid, i);
 				if (!vn_isdisk(vp, NULL))
 					printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    bp->b_vp->v_mount->mnt_stat.f_iosize,
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				else
 					printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
 				    m->valid, m->dirty, m->wire_count);
 				panic("biodone: page busy < 0\n");
 			}
 			vm_page_io_finish(m);
 			vm_object_pip_subtract(obj, 1);
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 			iosize -= resid;
 		}
 		if (obj)
 			vm_object_pip_wakeupn(obj, 0);
 	}
 
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * will do a wakeup there if necessary - so no need to do a wakeup
 	 * here in the async case. The sync case always needs to do a wakeup.
 	 */
 
 	if (bp->b_flags & B_ASYNC) {
 		if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_RELBUF)) || (bp->b_ioflags & BIO_ERROR))
 			brelse(bp);
 		else
 			bqrelse(bp);
 	} else {
 		wakeup(bp);
 	}
 	splx(s);
 }
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
 	runningbufwakeup(bp);
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj;
 
 		VOP_GETVOBJECT(vp, &obj);
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, OFF_TO_IDX(bp->b_offset) + i);
 				if (!m) {
 					panic("vfs_unbusy_pages: page missing\n");
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 			}
 			vm_object_pip_subtract(obj, 1);
 			vm_page_flag_clear(m, PG_ZERO);
 			vm_page_io_finish(m);
 		}
 		vm_object_pip_wakeupn(obj, 0);
 	}
 }
 
 /*
  * vfs_page_set_valid:
  *
  *	Set the valid bits in a page based on the supplied offset.   The
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundry or cross the end of the buffer.  The end of the
 	 * buffer, in this case, is our file EOF, not the allocation size
 	 * of the buffer.
 	 */
 	soff = off;
 	eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 	if (eoff > bp->b_offset + bp->b_bcount)
 		eoff = bp->b_offset + bp->b_bcount;
 
 	/*
 	 * Set valid range.  This is typically the entire buffer and thus the
 	 * entire page.
 	 */
 	if (eoff > soff) {
 		vm_page_set_validclean(
 		    m,
 		   (vm_offset_t) (soff & PAGE_MASK),
 		   (vm_offset_t) (eoff - soff)
 		);
 	}
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being PG_BUSY.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistant.
  *
  * Since I/O has not been initiated yet, certain buffer flags
  * such as BIO_ERROR or B_INVAL may be in an inconsistant state
  * and should be ignored.
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i, bogus;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj;
 		vm_ooffset_t foff;
 
 		VOP_GETVOBJECT(vp, &obj);
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_busy_pages: no buffer offset"));
 		vfs_setdirty(bp);
 
 retry:
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			if (vm_page_sleep_busy(m, FALSE, "vbpage"))
 				goto retry;
 		}
 
 		bogus = 0;
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			vm_page_flag_clear(m, PG_ZERO);
 			if ((bp->b_flags & B_CLUSTER) == 0) {
 				vm_object_pip_add(obj, 1);
 				vm_page_io_start(m);
 			}
 
 			/*
 			 * When readying a buffer for a read ( i.e
 			 * clear_modify == 0 ), it is important to do
 			 * bogus_page replacement for valid pages in 
 			 * partially instantiated buffers.  Partially 
 			 * instantiated buffers can, in turn, occur when
 			 * reconstituting a buffer from its VM backing store
 			 * base.  We only have to do this if B_CACHE is
 			 * clear ( which causes the I/O to occur in the
 			 * first place ).  The replacement prevents the read
 			 * I/O from overwriting potentially dirty VM-backed
 			 * pages.  XXX bogus page replacement is, uh, bogus.
 			 * It may not work properly with small-block devices.
 			 * We need to find a better way.
 			 */
 
 			vm_page_protect(m, VM_PROT_NONE);
 			if (clear_modify)
 				vfs_page_set_valid(bp, foff, i, m);
 			else if (m->valid == VM_PAGE_BITS_ALL &&
 				(bp->b_flags & B_CACHE) == 0) {
 				bp->b_pages[i] = bogus_page;
 				bogus++;
 			}
 			foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 		}
 		if (bogus)
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
 	}
 }
 
 /*
  * Tell the VM system that the pages associated with this buffer
  * are clean.  This is used for delayed writes where the data is
  * going to go to disk eventually without additional VM intevention.
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
  */
 static void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		vm_ooffset_t foff;
 
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_clean_pages: no buffer offset"));
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			vm_ooffset_t noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
 			vm_ooffset_t eoff = noff;
 
 			if (eoff > bp->b_offset + bp->b_bufsize)
 				eoff = bp->b_offset + bp->b_bufsize;
 			vfs_page_set_valid(bp, foff, i, m);
 			/* vm_page_clear_dirty(m, foff & PAGE_MASK, eoff - foff); */
 			foff = noff;
 		}
 	}
 }
 
 /*
  *	vfs_bio_set_validclean:
  *
  *	Set the range within the buffer to valid and clean.  The range is 
  *	relative to the beginning of the buffer, b_offset.  Note that b_offset
  *	itself may be offset from the beginning of the first page.
  */
 
 void   
 vfs_bio_set_validclean(struct buf *bp, int base, int size)
 {
 	if (bp->b_flags & B_VMIO) {
 		int i;
 		int n;
 
 		/*
 		 * Fixup base to be relative to beginning of first page.
 		 * Set initial n to be the maximum number of bytes in the
 		 * first page that can be validated.
 		 */
 
 		base += (bp->b_offset & PAGE_MASK);
 		n = PAGE_SIZE - (base & PAGE_MASK);
 
 		for (i = base / PAGE_SIZE; size > 0 && i < bp->b_npages; ++i) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (n > size)
 				n = size;
 
 			vm_page_set_validclean(m, base & PAGE_MASK, n);
 			base += n;
 			size -= n;
 			n = PAGE_SIZE;
 		}
 	}
 }
 
 /*
  *	vfs_bio_clrbuf:
  *
  *	clear a buffer.  This routine essentially fakes an I/O, so we need
  *	to clear BIO_ERROR and B_INVAL.
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
  */
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
 	int i, mask = 0;
 	caddr_t sa, ea;
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
 		    (bp->b_offset & PAGE_MASK) == 0) {
 			mask = (1 << (bp->b_bufsize / DEV_BSIZE)) - 1;
 			if (((bp->b_pages[0]->flags & PG_ZERO) == 0) &&
 			    ((bp->b_pages[0]->valid & mask) != mask)) {
 				bzero(bp->b_data, bp->b_bufsize);
 			}
 			bp->b_pages[0]->valid |= mask;
 			bp->b_resid = 0;
 			return;
 		}
 		ea = sa = bp->b_data;
 		for(i=0;i<bp->b_npages;i++,sa=ea) {
 			int j = ((vm_offset_t)sa & PAGE_MASK) / DEV_BSIZE;
 			ea = (caddr_t)trunc_page((vm_offset_t)sa + PAGE_SIZE);
 			ea = (caddr_t)(vm_offset_t)ulmin(
 			    (u_long)(vm_offset_t)ea,
 			    (u_long)(vm_offset_t)bp->b_data + bp->b_bufsize);
 			mask = ((1 << ((ea - sa) / DEV_BSIZE)) - 1) << j;
 			if ((bp->b_pages[i]->valid & mask) == mask)
 				continue;
 			if ((bp->b_pages[i]->valid & mask) == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
 					bzero(sa, ea - sa);
 				}
 			} else {
 				for (; sa < ea; sa += DEV_BSIZE, j++) {
 					if (((bp->b_pages[i]->flags & PG_ZERO) == 0) &&
 						(bp->b_pages[i]->valid & (1<<j)) == 0)
 						bzero(sa, DEV_BSIZE);
 				}
 			}
 			bp->b_pages[i]->valid |= mask;
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 		}
 		bp->b_resid = 0;
 	} else {
 		clrbuf(bp);
 	}
 }
 
 /*
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 
 tryagain:
 
 		/*
 		 * note: must allocate system pages since blocking here
 		 * could intefere with paging I/O, no matter which
 		 * process we are.
 		 */
 		p = vm_page_alloc(kernel_object,
 			((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 		    VM_ALLOC_SYSTEM);
 		if (!p) {
 			vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
 			VM_WAIT;
 			goto tryagain;
 		}
 		vm_page_wire(p);
 		p->valid = VM_PAGE_BITS_ALL;
 		vm_page_flag_clear(p, PG_ZERO);
 		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
 		bp->b_pages[index] = p;
 		vm_page_wakeup(p);
 	}
 	bp->b_npages = index;
 }
 
 void
 vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
 			if (p->busy) {
 				printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n",
 					bp->b_blkno, bp->b_lblkno);
 			}
 			bp->b_pages[index] = NULL;
 			pmap_kremove(pg);
 			vm_page_busy(p);
 			vm_page_unwire(p, 0);
 			vm_page_free(p);
 		}
 	}
 	bp->b_npages = newnpages;
 }
 
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(buffer, db_show_buffer)
 {
 	/* get args */
 	struct buf *bp = (struct buf *)addr;
 
 	if (!have_addr) {
 		db_printf("usage: show buffer <addr>\n");
 		return;
 	}
 
 	db_printf("b_flags = 0x%b\n", (u_int)bp->b_flags, PRINT_BUF_FLAGS);
 	db_printf("b_error = %d, b_bufsize = %ld, b_bcount = %ld, "
 		  "b_resid = %ld\nb_dev = (%d,%d), b_data = %p, "
 		  "b_blkno = %d, b_pblkno = %d\n",
 		  bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 		  major(bp->b_dev), minor(bp->b_dev),
 		  bp->b_data, bp->b_blkno, bp->b_pblkno);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m;
 			m = bp->b_pages[i];
 			db_printf("(%p, 0x%lx, 0x%lx)", (void *)m->object,
 			    (u_long)m->pindex, (u_long)VM_PAGE_TO_PHYS(m));
 			if ((i + 1) < bp->b_npages)
 				db_printf(",");
 		}
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/vfs_cluster.c
===================================================================
--- head/sys/kern/vfs_cluster.c	(revision 75579)
+++ head/sys/kern/vfs_cluster.c	(revision 75580)
@@ -1,929 +1,931 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Modifications/enhancements:
  * 	Copyright (c) 1995 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
  * $FreeBSD$
  */
 
 #include "opt_debug_cluster.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <sys/sysctl.h>
 
 #if defined(CLUSTERDEBUG)
 #include <sys/sysctl.h>
 static int	rcluster= 0;
 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, "");
 #endif
 
 static MALLOC_DEFINE(M_SEGMENT, "cluster_save buffer", "cluster_save buffer");
 
 static struct cluster_save *
 	cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp));
 static struct buf *
 	cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn,
 			    daddr_t blkno, long size, int run, struct buf *fbp));
 
 static int write_behind = 1;
 SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0, "");
 
 extern vm_page_t	bogus_page;
 
 extern int cluster_pbuf_freecnt;
 
 /*
  * Maximum number of blocks for read-ahead.
  */
 #define MAXRA 32
 
 /*
  * This replaces bread.
  */
 int
 cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lblkno;
 	long size;
 	struct ucred *cred;
 	long totread;
 	int seqcount;
 	struct buf **bpp;
 {
 	struct buf *bp, *rbp, *reqbp;
 	daddr_t blkno, origblkno;
 	int error, num_ra;
 	int i;
 	int maxra, racluster;
 	long origtotread;
 
 	error = 0;
 
 	/*
 	 * Try to limit the amount of read-ahead by a few
 	 * ad-hoc parameters.  This needs work!!!
 	 */
 	racluster = vp->v_mount->mnt_iosize_max / size;
 	maxra = 2 * racluster + (totread / size);
 	if (maxra > MAXRA)
 		maxra = MAXRA;
 	if (maxra > nbuf/8)
 		maxra = nbuf/8;
 
 	/*
 	 * get the requested block
 	 */
 	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0);
 	origblkno = lblkno;
 	origtotread = totread;
 
 	/*
 	 * if it is in the cache, then check to see if the reads have been
 	 * sequential.  If they have, then try some read-ahead, otherwise
 	 * back-off on prospective read-aheads.
 	 */
 	if (bp->b_flags & B_CACHE) {
 		if (!seqcount) {
 			return 0;
 		} else if ((bp->b_flags & B_RAM) == 0) {
 			return 0;
 		} else {
 			int s;
 			struct buf *tbp;
 			bp->b_flags &= ~B_RAM;
 			/*
 			 * We do the spl here so that there is no window
 			 * between the incore and the b_usecount increment
 			 * below.  We opt to keep the spl out of the loop
 			 * for efficiency.
 			 */
 			s = splbio();
 			for (i = 1; i < maxra; i++) {
 
 				if (!(tbp = incore(vp, lblkno+i))) {
 					break;
 				}
 
 				/*
 				 * Set another read-ahead mark so we know 
 				 * to check again.
 				 */
 				if (((i % racluster) == (racluster - 1)) ||
 					(i == (maxra - 1)))
 					tbp->b_flags |= B_RAM;
 			}
 			splx(s);
 			if (i >= maxra) {
 				return 0;
 			}
 			lblkno += i;
 		}
 		reqbp = bp = NULL;
 	} else {
 		off_t firstread = bp->b_offset;
 
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("cluster_read: no buffer offset"));
 		if (firstread + totread > filesize)
 			totread = filesize - firstread;
 		if (totread > size) {
 			int nblks = 0;
 			int ncontigafter;
 			while (totread > 0) {
 				nblks++;
 				totread -= size;
 			}
 			if (nblks == 1)
 				goto single_block_read;
 			if (nblks > racluster)
 				nblks = racluster;
 
 	    		error = VOP_BMAP(vp, lblkno, NULL,
 				&blkno, &ncontigafter, NULL);
 			if (error)
 				goto single_block_read;
 			if (blkno == -1)
 				goto single_block_read;
 			if (ncontigafter == 0)
 				goto single_block_read;
 			if (ncontigafter + 1 < nblks)
 				nblks = ncontigafter + 1;
 
 			bp = cluster_rbuild(vp, filesize, lblkno,
 				blkno, size, nblks, bp);
 			lblkno += (bp->b_bufsize / size);
 		} else {
 single_block_read:
 			/*
 			 * if it isn't in the cache, then get a chunk from
 			 * disk if sequential, otherwise just get the block.
 			 */
 			bp->b_flags |= B_RAM;
 			bp->b_iocmd = BIO_READ;
 			lblkno += 1;
 		}
 	}
 
 	/*
 	 * if we have been doing sequential I/O, then do some read-ahead
 	 */
 	rbp = NULL;
 	if (seqcount && (lblkno < (origblkno + seqcount))) {
 		/*
 		 * we now build the read-ahead buffer if it is desirable.
 		 */
 		if (((u_quad_t)(lblkno + 1) * size) <= filesize &&
 		    !(error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_ra, NULL)) &&
 		    blkno != -1) {
 			int nblksread;
 			int ntoread = num_ra + 1;
 			nblksread = (origtotread + size - 1) / size;
 			if (seqcount < nblksread)
 				seqcount = nblksread;
 			if (seqcount < ntoread)
 				ntoread = seqcount;
 			if (num_ra) {
 				rbp = cluster_rbuild(vp, filesize, lblkno,
 					blkno, size, ntoread, NULL);
 			} else {
 				rbp = getblk(vp, lblkno, size, 0, 0);
 				rbp->b_flags |= B_ASYNC | B_RAM;
 				rbp->b_iocmd = BIO_READ;
 				rbp->b_blkno = blkno;
 			}
 		}
 	}
 
 	/*
 	 * handle the synchronous read
 	 */
 	if (bp) {
 #if defined(CLUSTERDEBUG)
 		if (rcluster)
 			printf("S(%ld,%ld,%d) ",
 			    (long)bp->b_lblkno, bp->b_bcount, seqcount);
 #endif
 		if ((bp->b_flags & B_CLUSTER) == 0) {
 			vfs_busy_pages(bp, 0);
 		}
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if ((bp->b_flags & B_ASYNC) || bp->b_iodone != NULL)
 			BUF_KERNPROC(bp);
 		error = VOP_STRATEGY(vp, bp);
 		curproc->p_stats->p_ru.ru_inblock++;
 	}
 
 	/*
 	 * and if we have read-aheads, do them too
 	 */
 	if (rbp) {
 		if (error) {
 			rbp->b_flags &= ~B_ASYNC;
 			brelse(rbp);
 		} else if (rbp->b_flags & B_CACHE) {
 			rbp->b_flags &= ~B_ASYNC;
 			bqrelse(rbp);
 		} else {
 #if defined(CLUSTERDEBUG)
 			if (rcluster) {
 				if (bp)
 					printf("A+(%ld,%ld,%ld,%d) ",
 					    (long)rbp->b_lblkno, rbp->b_bcount,
 					    (long)(rbp->b_lblkno - origblkno),
 					    seqcount);
 				else
 					printf("A(%ld,%ld,%ld,%d) ",
 					    (long)rbp->b_lblkno, rbp->b_bcount,
 					    (long)(rbp->b_lblkno - origblkno),
 					    seqcount);
 			}
 #endif
 
 			if ((rbp->b_flags & B_CLUSTER) == 0) {
 				vfs_busy_pages(rbp, 0);
 			}
 			rbp->b_flags &= ~B_INVAL;
 			rbp->b_ioflags &= ~BIO_ERROR;
 			if ((rbp->b_flags & B_ASYNC) || rbp->b_iodone != NULL)
 				BUF_KERNPROC(rbp);
 			(void) VOP_STRATEGY(vp, rbp);
 			curproc->p_stats->p_ru.ru_inblock++;
 		}
 	}
 	if (reqbp)
 		return (bufwait(reqbp));
 	else
 		return (error);
 }
 
 /*
  * If blocks are contiguous on disk, use this to provide clustered
  * read ahead.  We will read as many blocks as possible sequentially
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
 cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lbn;
 	daddr_t blkno;
 	long size;
 	int run;
 	struct buf *fbp;
 {
 	struct buf *bp, *tbp;
 	daddr_t bn;
 	int i, inc, j;
 
 	KASSERT(size == vp->v_mount->mnt_stat.f_iosize,
 	    ("cluster_rbuild: size %ld != filesize %ld\n",
 	    size, vp->v_mount->mnt_stat.f_iosize));
 
 	/*
 	 * avoid a division
 	 */
 	while ((u_quad_t) size * (lbn + run) > filesize) {
 		--run;
 	}
 
 	if (fbp) {
 		tbp = fbp;
 		tbp->b_iocmd = BIO_READ; 
 	} else {
 		tbp = getblk(vp, lbn, size, 0, 0);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_RAM;
 		tbp->b_iocmd = BIO_READ;
 	}
 
 	tbp->b_blkno = blkno;
 	if( (tbp->b_flags & B_MALLOC) ||
 		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
 		return tbp;
 
 	bp = trypbuf(&cluster_pbuf_freecnt);
 	if (bp == 0)
 		return tbp;
 
 	bp->b_data = (char *)((vm_offset_t)bp->b_data |
 	    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 	bp->b_flags = B_ASYNC | B_CLUSTER | B_VMIO;
 	bp->b_iocmd = BIO_READ;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
 	bp->b_lblkno = lbn;
 	bp->b_offset = tbp->b_offset;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_rbuild: no buffer offset"));
 	pbgetvp(vp, bp);
 
 	TAILQ_INIT(&bp->b_cluster.cluster_head);
 
 	bp->b_bcount = 0;
 	bp->b_bufsize = 0;
 	bp->b_npages = 0;
 
 	inc = btodb(size);
 	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
 		if (i != 0) {
 			if ((bp->b_npages * PAGE_SIZE) +
 				round_page(size) > vp->v_mount->mnt_iosize_max)
 				break;
 
 			if ((tbp = incore(vp, lbn + i)) != NULL) {
 				if (BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT))
 					break;
 				BUF_UNLOCK(tbp);
 
 				for (j = 0; j < tbp->b_npages; j++)
 					if (tbp->b_pages[j]->valid)
 						break;
 				
 				if (j != tbp->b_npages)
 					break;
 	
 				if (tbp->b_bcount != size)
 					break;
 			}
 
 			tbp = getblk(vp, lbn + i, size, 0, 0);
 
 			/*
 			 * If the buffer is already fully valid or locked
 			 * (which could also mean that a background write is
 			 * in progress), or the buffer is not backed by VMIO,
 			 * stop.
 			 */
 			if ((tbp->b_flags & (B_CACHE|B_LOCKED)) ||
 				(tbp->b_flags & B_VMIO) == 0) {
 				bqrelse(tbp);
 				break;
 			}
 
 			for (j = 0;j < tbp->b_npages; j++) {
 				if (tbp->b_pages[j]->valid)
 					break;
 			}
 
 			if (j != tbp->b_npages) {
 				bqrelse(tbp);
 				break;
 			}
 
 			if ((fbp && (i == 1)) || (i == (run - 1)))
 				tbp->b_flags |= B_RAM;
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_READ;
 			if (tbp->b_blkno == tbp->b_lblkno) {
 				tbp->b_blkno = bn;
 			} else if (tbp->b_blkno != bn) {
 				brelse(tbp);
 				break;
 			}
 		}
 		/*
 		 * XXX fbp from caller may not be B_ASYNC, but we are going
 		 * to biodone() it in cluster_callback() anyway
 		 */
 		BUF_KERNPROC(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
 			vm_page_io_start(m);
 			vm_object_pip_add(m->object, 1);
 			if ((bp->b_npages == 0) ||
 				(bp->b_pages[bp->b_npages-1] != m)) {
 				bp->b_pages[bp->b_npages] = m;
 				bp->b_npages++;
 			}
 			if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
 				tbp->b_pages[j] = bogus_page;
 		}
 		bp->b_bcount += tbp->b_bcount;
 		bp->b_bufsize += tbp->b_bufsize;
 	}
 
 	for(j=0;j<bp->b_npages;j++) {
 		if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
 			VM_PAGE_BITS_ALL)
 			bp->b_pages[j] = bogus_page;
 	}
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 		    bp->b_bufsize, bp->b_kvasize);
 	bp->b_kvasize = bp->b_bufsize;
 
 	pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		(vm_page_t *)bp->b_pages, bp->b_npages);
 	return (bp);
 }
 
 /*
  * Cleanup after a clustered read or write.
  * This is complicated by the fact that any of the buffers might have
  * extra memory (if there were no empty buffer headers at allocbuf time)
  * that we will need to shift around.
  */
 void
 cluster_callback(bp)
 	struct buf *bp;
 {
 	struct buf *nbp, *tbp;
 	int error = 0;
 
 	/*
 	 * Must propogate errors to all the components.
 	 */
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
 	 */
 	for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head);
 		tbp; tbp = nbp) {
 		nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry);
 		if (error) {
 			tbp->b_ioflags |= BIO_ERROR;
 			tbp->b_error = error;
 		} else {
 			tbp->b_dirtyoff = tbp->b_dirtyend = 0;
 			tbp->b_flags &= ~B_INVAL;
 			tbp->b_ioflags &= ~BIO_ERROR;
 		}
 		bufdone(tbp);
 	}
 	relpbuf(bp, &cluster_pbuf_freecnt);
 }
 
 /*
  *	cluster_wbuild_wb:
  *
  *	Implement modified write build for cluster.
  *
  *		write_behind = 0	write behind disabled
  *		write_behind = 1	write behind normal (default)
  *		write_behind = 2	write behind backed-off
  */
 
 static __inline int
 cluster_wbuild_wb(struct vnode *vp, long size, daddr_t start_lbn, int len)
 {
 	int r = 0;
 
 	switch(write_behind) {
 	case 2:
 		if (start_lbn < len)
 			break;
 		start_lbn -= len;
 		/* fall through */
 	case 1:
 		r = cluster_wbuild(vp, size, start_lbn, len);
 		/* fall through */
 	default:
 		/* fall through */
 		break;
 	}
 	return(r);
 }
 
 /*
  * Do clustered write for FFS.
  *
  * Three cases:
  *	1. Write is not sequential (write asynchronously)
  *	Write is sequential:
  *	2.	beginning of cluster - begin cluster
  *	3.	middle of a cluster - add to cluster
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
 cluster_write(bp, filesize, seqcount)
 	struct buf *bp;
 	u_quad_t filesize;
 	int seqcount;
 {
 	struct vnode *vp;
 	daddr_t lbn;
 	int maxclen, cursize;
 	int lblocksize;
 	int async;
 
 	vp = bp->b_vp;
 	if (vp->v_type == VREG) {
 		async = vp->v_mount->mnt_flag & MNT_ASYNC;
 		lblocksize = vp->v_mount->mnt_stat.f_iosize;
 	} else {
 		async = 0;
 		lblocksize = bp->b_bufsize;
 	}
 	lbn = bp->b_lblkno;
 	KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset"));
 
 	/* Initialize vnode to beginning of file. */
 	if (lbn == 0)
 		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 
 	if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
 	    (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
 		maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1;
 		if (vp->v_clen != 0) {
 			/*
 			 * Next block is not sequential.
 			 *
 			 * If we are not writing at end of file, the process
 			 * seeked to another point in the file since its last
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
 			 *
 			 * Change to algorithm: only push previous cluster if
 			 * it was sequential from the point of view of the
 			 * seqcount heuristic, otherwise leave the buffer 
 			 * intact so we can potentially optimize the I/O
 			 * later on in the buf_daemon or update daemon
 			 * flush.
 			 */
 			cursize = vp->v_lastw - vp->v_cstart + 1;
 			if (((u_quad_t) bp->b_offset + lblocksize) != filesize ||
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
 				if (!async && seqcount > 0) {
 					cluster_wbuild_wb(vp, lblocksize,
 						vp->v_cstart, cursize);
 				}
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
 				buflist = cluster_collectbufs(vp, bp);
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
 					 * Failed, push the previous cluster
 					 * if *really* writing sequentially
 					 * in the logical file (seqcount > 1),
 					 * otherwise delay it in the hopes that
 					 * the low level disk driver can
 					 * optimize the write ordering.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
 					if (seqcount > 1) {
 						cluster_wbuild_wb(vp, 
 						    lblocksize, vp->v_cstart, 
 						    cursize);
 					}
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp <= endbp; bpp++)
 						bdwrite(*bpp);
 					free(buflist, M_SEGMENT);
 					vp->v_lastw = lbn;
 					vp->v_lasta = bp->b_blkno;
 					return;
 				}
 			}
 		}
 		/*
 		 * Consider beginning a cluster. If at end of file, make
 		 * cluster as large as possible, otherwise find size of
 		 * existing cluster.
 		 */
 		if ((vp->v_type == VREG) &&
 			((u_quad_t) bp->b_offset + lblocksize) != filesize &&
 		    (bp->b_blkno == bp->b_lblkno) &&
 		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
 		     bp->b_blkno == -1)) {
 			bawrite(bp);
 			vp->v_clen = 0;
 			vp->v_lasta = bp->b_blkno;
 			vp->v_cstart = lbn + 1;
 			vp->v_lastw = lbn;
 			return;
 		}
 		vp->v_clen = maxclen;
 		if (!async && maxclen == 0) {	/* I/O not contiguous */
 			vp->v_cstart = lbn + 1;
 			bawrite(bp);
 		} else {	/* Wait for rest of cluster */
 			vp->v_cstart = lbn;
 			bdwrite(bp);
 		}
 	} else if (lbn == vp->v_cstart + vp->v_clen) {
 		/*
 		 * At end of cluster, write it out if seqcount tells us we
 		 * are operating sequentially, otherwise let the buf or
 		 * update daemon handle it.
 		 */
 		bdwrite(bp);
 		if (seqcount > 1)
 			cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
 	} else if (vm_page_count_severe()) {
 		/*
 		 * We are low on memory, get it going NOW
 		 */
 		bawrite(bp);
 	} else {
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
 	}
 	vp->v_lastw = lbn;
 	vp->v_lasta = bp->b_blkno;
 }
 
 
 /*
  * This is an awful lot like cluster_rbuild...wish they could be combined.
  * The last lbn argument is the current block on which I/O is being
  * performed.  Check to see that it doesn't fall in the middle of
  * the current block (if last_bp == NULL).
  */
 int
 cluster_wbuild(vp, size, start_lbn, len)
 	struct vnode *vp;
 	long size;
 	daddr_t start_lbn;
 	int len;
 {
 	struct buf *bp, *tbp;
 	int i, j, s;
 	int totalwritten = 0;
 	int dbsize = btodb(size);
 
 	while (len > 0) {
 		s = splbio();
 		/*
 		 * If the buffer is not delayed-write (i.e. dirty), or it
 		 * is delayed-write but either locked or inval, it cannot
 		 * partake in the clustered write.
 		 */
 		if (((tbp = gbincore(vp, start_lbn)) == NULL) ||
 		  ((tbp->b_flags & (B_LOCKED | B_INVAL | B_DELWRI)) != B_DELWRI) ||
 		  BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			++start_lbn;
 			--len;
 			splx(s);
 			continue;
 		}
 		bremfree(tbp);
 		tbp->b_flags &= ~B_DONE;
 		splx(s);
 
 		/*
 		 * Extra memory in the buffer, punt on this buffer.
 		 * XXX we could handle this in most cases, but we would
 		 * have to push the extra memory down to after our max
 		 * possible cluster size and then potentially pull it back
 		 * up if the cluster was terminated prematurely--too much
 		 * hassle.
 		 */
 		if (((tbp->b_flags & (B_CLUSTEROK | B_MALLOC | B_VMIO)) != 
 		     (B_CLUSTEROK | B_VMIO)) ||
 		  (tbp->b_bcount != tbp->b_bufsize) ||
 		  (tbp->b_bcount != size) ||
 		  (len == 1) ||
 		  ((bp = getpbuf(&cluster_pbuf_freecnt)) == NULL)) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 
 		/*
 		 * We got a pbuf to make the cluster in.
 		 * so initialise it.
 		 */
 		TAILQ_INIT(&bp->b_cluster.cluster_head);
 		bp->b_bcount = 0;
+		bp->b_magic = tbp->b_magic;
+		bp->b_op = tbp->b_op;
 		bp->b_bufsize = 0;
 		bp->b_npages = 0;
 		if (tbp->b_wcred != NOCRED) {
 		    bp->b_wcred = tbp->b_wcred;
 		    crhold(bp->b_wcred);
 		}
 
 		bp->b_blkno = tbp->b_blkno;
 		bp->b_lblkno = tbp->b_lblkno;
 		bp->b_offset = tbp->b_offset;
 		bp->b_data = (char *)((vm_offset_t)bp->b_data |
 		    ((vm_offset_t)tbp->b_data & PAGE_MASK));
 		bp->b_flags |= B_CLUSTER |
 				(tbp->b_flags & (B_VMIO | B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
 		/*
 		 * From this location in the file, scan forward to see
 		 * if there are buffers with adjacent data that need to
 		 * be written as well.
 		 */
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) { /* If not the first buffer */
 				s = splbio();
 				/*
 				 * If the adjacent data is not even in core it
 				 * can't need to be written.
 				 */
 				if ((tbp = gbincore(vp, start_lbn)) == NULL) {
 					splx(s);
 					break;
 				}
 
 				/*
 				 * If it IS in core, but has different
 				 * characteristics, or is locked (which
 				 * means it could be undergoing a background
 				 * I/O or be in a weird state), then don't
 				 * cluster with it.
 				 */
 				if ((tbp->b_flags & (B_VMIO | B_CLUSTEROK |
 				    B_INVAL | B_DELWRI | B_NEEDCOMMIT))
 				  != (B_DELWRI | B_CLUSTEROK |
 				    (bp->b_flags & (B_VMIO | B_NEEDCOMMIT))) ||
 				    (tbp->b_flags & B_LOCKED) ||
 				    tbp->b_wcred != bp->b_wcred ||
 				    BUF_LOCK(tbp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					splx(s);
 					break;
 				}
 
 				/*
 				 * Check that the combined cluster
 				 * would make sense with regard to pages
 				 * and would not be too large
 				 */
 				if ((tbp->b_bcount != size) ||
 				  ((bp->b_blkno + (dbsize * i)) !=
 				    tbp->b_blkno) ||
 				  ((tbp->b_npages + bp->b_npages) >
 				    (vp->v_mount->mnt_iosize_max / PAGE_SIZE))) {
 					BUF_UNLOCK(tbp);
 					splx(s);
 					break;
 				}
 				/*
 				 * Ok, it's passed all the tests,
 				 * so remove it from the free list
 				 * and mark it busy. We will use it.
 				 */
 				bremfree(tbp);
 				tbp->b_flags &= ~B_DONE;
 				splx(s);
 			} /* end of code for non-first buffers only */
 			/* check for latent dependencies to be handled */
 			if ((LIST_FIRST(&tbp->b_dep)) != NULL)
 				buf_start(tbp);
 			/*
 			 * If the IO is via the VM then we do some
 			 * special VM hackery. (yuck)
 			 */
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
 				if (i != 0) { /* if not first buffer */
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
 						if (m->flags & PG_BUSY) {
 							bqrelse(tbp);
 							goto finishcluster;
 						}
 					}
 				}
 					
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					vm_page_io_start(m);
 					vm_object_pip_add(m->object, 1);
 					if ((bp->b_npages == 0) ||
 					  (bp->b_pages[bp->b_npages - 1] != m)) {
 						bp->b_pages[bp->b_npages] = m;
 						bp->b_npages++;
 					}
 				}
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
 
 			s = splbio();
 			bundirty(tbp);
 			tbp->b_flags &= ~B_DONE;
 			tbp->b_ioflags &= ~BIO_ERROR;
 			tbp->b_flags |= B_ASYNC;
 			tbp->b_iocmd = BIO_WRITE;
 			reassignbuf(tbp, tbp->b_vp);	/* put on clean list */
 			++tbp->b_vp->v_numoutput;
 			splx(s);
 			BUF_KERNPROC(tbp);
 			TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			(vm_page_t *) bp->b_pages, bp->b_npages);
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
 			    bp->b_bufsize, bp->b_kvasize);
 		bp->b_kvasize = bp->b_bufsize;
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;
 		bawrite(bp);
 
 		len -= i;
 	}
 	return totalwritten;
 }
 
 /*
  * Collect together all the buffers in a cluster.
  * Plus add one additional buffer.
  */
 static struct cluster_save *
 cluster_collectbufs(vp, last_bp)
 	struct vnode *vp;
 	struct buf *last_bp;
 {
 	struct cluster_save *buflist;
 	struct buf *bp;
 	daddr_t lbn;
 	int i, len;
 
 	len = vp->v_lastw - vp->v_cstart + 1;
 	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
 	    M_SEGMENT, M_WAITOK);
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) {
 		(void) bread(vp, lbn, last_bp->b_bcount, NOCRED, &bp);
 		buflist->bs_children[i] = bp;
 		if (bp->b_blkno == bp->b_lblkno)
 			VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 				NULL, NULL);
 	}
 	buflist->bs_children[i] = bp = last_bp;
 	if (bp->b_blkno == bp->b_lblkno)
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno,
 			NULL, NULL);
 	buflist->bs_nchildren = i + 1;
 	return (buflist);
 }
Index: head/sys/kern/vfs_default.c
===================================================================
--- head/sys/kern/vfs_default.c	(revision 75579)
+++ head/sys/kern/vfs_default.c	(revision 75580)
@@ -1,753 +1,745 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed
  * to Berkeley by John Heidemann of the UCLA Ficus project.
  *
  * Source: * @(#)i405_init.c 2.10 92/04/27 UCLA Ficus project
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *
  * $FreeBSD$
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/poll.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 
 static int	vop_nolookup __P((struct vop_lookup_args *));
 static int	vop_nostrategy __P((struct vop_strategy_args *));
 
 /*
  * This vnode table stores what we want to do if the filesystem doesn't
  * implement a particular VOP.
  *
  * If there is no specific entry here, we will return EOPNOTSUPP.
  *
  */
 
 vop_t **default_vnodeop_p;
 static struct vnodeopv_entry_desc default_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_eopnotsupp },
 	{ &vop_advlock_desc,		(vop_t *) vop_einval },
-	{ &vop_bwrite_desc,		(vop_t *) vop_stdbwrite },
 	{ &vop_close_desc,		(vop_t *) vop_null },
 	{ &vop_createvobject_desc,	(vop_t *) vop_stdcreatevobject },
 	{ &vop_destroyvobject_desc,	(vop_t *) vop_stddestroyvobject },
 	{ &vop_fsync_desc,		(vop_t *) vop_null },
 	{ &vop_getvobject_desc,		(vop_t *) vop_stdgetvobject },
 	{ &vop_inactive_desc,		(vop_t *) vop_stdinactive },
 	{ &vop_ioctl_desc,		(vop_t *) vop_enotty },
 	{ &vop_islocked_desc,		(vop_t *) vop_noislocked },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_lock_desc,		(vop_t *) vop_nolock },
 	{ &vop_lookup_desc,		(vop_t *) vop_nolookup },
 	{ &vop_open_desc,		(vop_t *) vop_null },
 	{ &vop_pathconf_desc,		(vop_t *) vop_einval },
 	{ &vop_poll_desc,		(vop_t *) vop_nopoll },
 	{ &vop_readlink_desc,		(vop_t *) vop_einval },
 	{ &vop_revoke_desc,		(vop_t *) vop_revoke },
 	{ &vop_strategy_desc,		(vop_t *) vop_nostrategy },
 	{ &vop_unlock_desc,		(vop_t *) vop_nounlock },
 	{ NULL, NULL }
 };
 
 static struct vnodeopv_desc default_vnodeop_opv_desc =
         { &default_vnodeop_p, default_vnodeop_entries };
 
 VNODEOP_SET(default_vnodeop_opv_desc);
 
 int
 vop_eopnotsupp(struct vop_generic_args *ap)
 {
 	/*
 	printf("vop_notsupp[%s]\n", ap->a_desc->vdesc_name);
 	*/
 
 	return (EOPNOTSUPP);
 }
 
 int
 vop_ebadf(struct vop_generic_args *ap)
 {
 
 	return (EBADF);
 }
 
 int
 vop_enotty(struct vop_generic_args *ap)
 {
 
 	return (ENOTTY);
 }
 
 int
 vop_einval(struct vop_generic_args *ap)
 {
 
 	return (EINVAL);
 }
 
 int
 vop_null(struct vop_generic_args *ap)
 {
 
 	return (0);
 }
 
 int
 vop_defaultop(struct vop_generic_args *ap)
 {
 
 	return (VOCALL(default_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 int
 vop_panic(struct vop_generic_args *ap)
 {
 
 	panic("filesystem goof: vop_panic[%s]", ap->a_desc->vdesc_name);
 }
 
 static int
 vop_nolookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  *	vop_nostrategy:
  *
  *	Strategy routine for VFS devices that have none.
  *
  *	BIO_ERROR and B_INVAL must be cleared prior to calling any strategy
  *	routine.  Typically this is done for a BIO_READ strategy call.
  *	Typically B_INVAL is assumed to already be clear prior to a write 
  *	and should not be cleared manually unless you just made the buffer
  *	invalid.  BIO_ERROR should be cleared either way.
  */
 
 static int
 vop_nostrategy (struct vop_strategy_args *ap)
 {
 	printf("No strategy for buffer at %p\n", ap->a_bp);
 	vprint("", ap->a_vp);
 	vprint("", ap->a_bp->b_vp);
 	ap->a_bp->b_ioflags |= BIO_ERROR;
 	ap->a_bp->b_error = EOPNOTSUPP;
 	bufdone(ap->a_bp);
 	return (EOPNOTSUPP);
 }
 
 int
 vop_stdpathconf(ap)
 	struct vop_pathconf_args /* {
 	struct vnode *a_vp;
 	int a_name;
 	int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 		case _PC_LINK_MAX:
 			*ap->a_retval = LINK_MAX;
 			return (0);
 		case _PC_MAX_CANON:
 			*ap->a_retval = MAX_CANON;
 			return (0);
 		case _PC_MAX_INPUT:
 			*ap->a_retval = MAX_INPUT;
 			return (0);
 		case _PC_PIPE_BUF:
 			*ap->a_retval = PIPE_BUF;
 			return (0);
 		case _PC_CHOWN_RESTRICTED:
 			*ap->a_retval = 1;
 			return (0);
 		case _PC_VDISABLE:
 			*ap->a_retval = _POSIX_VDISABLE;
 			return (0);
 		default:
 			return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Standard lock, unlock and islocked functions.
  *
  * These depend on the lock structure being the first element in the
  * inode, ie: vp->v_data points to the the lock!
  */
 int
 vop_stdlock(ap)
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		struct proc *a_p;
 	} */ *ap;
 {               
 	struct vnode *vp = ap->a_vp;
 
 #ifndef	DEBUG_LOCKS
 	return (lockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock, ap->a_p));
 #else
 	return (debuglockmgr(&vp->v_lock, ap->a_flags, &vp->v_interlock,
 	    ap->a_p, "vop_stdlock", vp->filename, vp->line));
 #endif
 }
 
 int
 vop_stdunlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	return (lockmgr(&vp->v_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, 
 	    ap->a_p));
 }
 
 int
 vop_stdislocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (lockstatus(&ap->a_vp->v_lock, ap->a_p));
 }
 
 int
 vop_stdinactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Return true for select/poll.
  */
 int
 vop_nopoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	/*
 	 * Return true for read/write.  If the user asked for something
 	 * special, return POLLNVAL, so that clients have a way of
 	 * determining reliably whether or not the extended
 	 * functionality is present without hard-coding knowledge
 	 * of specific filesystem implementations.
 	 */
 	if (ap->a_events & ~POLLSTANDARD)
 		return (POLLNVAL);
 
 	return (ap->a_events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
 }
 
 /*
  * Implement poll for local filesystems that support it.
  */
 int
 vop_stdpoll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	if ((ap->a_events & ~POLLSTANDARD) == 0)
 		return (ap->a_events & (POLLRDNORM|POLLWRNORM));
 	return (vn_pollrecord(ap->a_vp, ap->a_p, ap->a_events));
-}
-
-int
-vop_stdbwrite(ap)
-	struct vop_bwrite_args *ap;
-{
-	return (bwrite(ap->a_bp));
 }
 
 /*
  * Stubs to use when there is no locking to be done on the underlying object.
  * A minimal shared lock is necessary to ensure that the underlying object
  * is not revoked while an operation is in progress. So, an active shared
  * count is maintained in an auxillary vnode lock structure.
  */
 int
 vop_sharedlock(ap)
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		struct proc *a_p;
 	} */ *ap;
 {
 	/*
 	 * This code cannot be used until all the non-locking filesystems
 	 * (notably NFS) are converted to properly lock and release nodes.
 	 * Also, certain vnode operations change the locking state within
 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
 	 * and symlink). Ideally these operations should not change the
 	 * lock state, but should be changed to let the caller of the
 	 * function unlock them. Otherwise all intermediate vnode layers
 	 * (such as union, umapfs, etc) must catch these functions to do
 	 * the necessary locking at their layer. Note that the inactive
 	 * and lookup operations also change their lock state, but this 
 	 * cannot be avoided, so these two operations will always need
 	 * to be handled in intermediate layers.
 	 */
 	struct vnode *vp = ap->a_vp;
 	int vnflags, flags = ap->a_flags;
 
 	switch (flags & LK_TYPE_MASK) {
 	case LK_DRAIN:
 		vnflags = LK_DRAIN;
 		break;
 	case LK_EXCLUSIVE:
 #ifdef DEBUG_VFS_LOCKS
 		/*
 		 * Normally, we use shared locks here, but that confuses
 		 * the locking assertions.
 		 */
 		vnflags = LK_EXCLUSIVE;
 		break;
 #endif
 	case LK_SHARED:
 		vnflags = LK_SHARED;
 		break;
 	case LK_UPGRADE:
 	case LK_EXCLUPGRADE:
 	case LK_DOWNGRADE:
 		return (0);
 	case LK_RELEASE:
 	default:
 		panic("vop_sharedlock: bad operation %d", flags & LK_TYPE_MASK);
 	}
 	if (flags & LK_INTERLOCK)
 		vnflags |= LK_INTERLOCK;
 #ifndef	DEBUG_LOCKS
 	return (lockmgr(&vp->v_lock, vnflags, &vp->v_interlock, ap->a_p));
 #else
 	return (debuglockmgr(&vp->v_lock, vnflags, &vp->v_interlock, ap->a_p,
 	    "vop_sharedlock", vp->filename, vp->line));
 #endif
 }
 
 /*
  * Stubs to use when there is no locking to be done on the underlying object.
  * A minimal shared lock is necessary to ensure that the underlying object
  * is not revoked while an operation is in progress. So, an active shared
  * count is maintained in an auxillary vnode lock structure.
  */
 int
 vop_nolock(ap)
 	struct vop_lock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		struct proc *a_p;
 	} */ *ap;
 {
 #ifdef notyet
 	/*
 	 * This code cannot be used until all the non-locking filesystems
 	 * (notably NFS) are converted to properly lock and release nodes.
 	 * Also, certain vnode operations change the locking state within
 	 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
 	 * and symlink). Ideally these operations should not change the
 	 * lock state, but should be changed to let the caller of the
 	 * function unlock them. Otherwise all intermediate vnode layers
 	 * (such as union, umapfs, etc) must catch these functions to do
 	 * the necessary locking at their layer. Note that the inactive
 	 * and lookup operations also change their lock state, but this 
 	 * cannot be avoided, so these two operations will always need
 	 * to be handled in intermediate layers.
 	 */
 	struct vnode *vp = ap->a_vp;
 	int vnflags, flags = ap->a_flags;
 
 	switch (flags & LK_TYPE_MASK) {
 	case LK_DRAIN:
 		vnflags = LK_DRAIN;
 		break;
 	case LK_EXCLUSIVE:
 	case LK_SHARED:
 		vnflags = LK_SHARED;
 		break;
 	case LK_UPGRADE:
 	case LK_EXCLUPGRADE:
 	case LK_DOWNGRADE:
 		return (0);
 	case LK_RELEASE:
 	default:
 		panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
 	}
 	if (flags & LK_INTERLOCK)
 		vnflags |= LK_INTERLOCK;
 	return(lockmgr(&vp->v_lock, vnflags, &vp->v_interlock, ap->a_p));
 #else /* for now */
 	/*
 	 * Since we are not using the lock manager, we must clear
 	 * the interlock here.
 	 */
 	if (ap->a_flags & LK_INTERLOCK)
 		mtx_unlock(&ap->a_vp->v_interlock);
 	return (0);
 #endif
 }
 
 /*
  * Do the inverse of vop_nolock, handling the interlock in a compatible way.
  */
 int
 vop_nounlock(ap)
 	struct vop_unlock_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	/*
 	 * Since we are not using the lock manager, we must clear
 	 * the interlock here.
 	 */
 	if (ap->a_flags & LK_INTERLOCK)
 		mtx_unlock(&ap->a_vp->v_interlock);
 	return (0);
 }
 
 /*
  * Return whether or not the node is in use.
  */
 int
 vop_noislocked(ap)
 	struct vop_islocked_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (0);
 }
 
 /*
  * Return our mount point, as we will take charge of the writes.
  */
 int
 vop_stdgetwritemount(ap)
 	struct vop_getwritemount_args /* {
 		struct vnode *a_vp;
 		struct mount **a_mpp;
 	} */ *ap;
 {
 
 	*(ap->a_mpp) = ap->a_vp->v_mount;
 	return (0);
 }
 
 int
 vop_stdcreatevobject(ap)
 	struct vop_createvobject_args /* {
 		struct vnode *vp;
 		struct ucred *cred;
 		struct proc *p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred = ap->a_cred;
 	struct proc *p = ap->a_p;
 	struct vattr vat;
 	vm_object_t object;
 	int error = 0;
 
 	if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
 		return (0);
 
 retry:
 	if ((object = vp->v_object) == NULL) {
 		if (vp->v_type == VREG || vp->v_type == VDIR) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
 			object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
 		} else if (devsw(vp->v_rdev) != NULL) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a disk vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
 			object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
 		} else {
 			goto retn;
 		}
 		/*
 		 * Dereference the reference we just created.  This assumes
 		 * that the object is associated with the vp.
 		 */
 		object->ref_count--;
 		vp->v_usecount--;
 	} else {
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
 			tsleep(object, PVM, "vodead", 0);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
 	}
 
 	KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
 	vp->v_flag |= VOBJBUF;
 
 retn:
 	return (error);
 }
 
 int
 vop_stddestroyvobject(ap)
 	struct vop_destroyvobject_args /* {
 		struct vnode *vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	vm_object_t obj = vp->v_object;
 
 	if (vp->v_object == NULL)
 		return (0);
 
 	if (obj->ref_count == 0) {
 		/*
 		 * vclean() may be called twice. The first time
 		 * removes the primary reference to the object,
 		 * the second time goes one further and is a
 		 * special-case to terminate the object.
 		 */
 		vm_object_terminate(obj);
 	} else {
 		/*
 		 * Woe to the process that tries to page now :-).
 		 */
 		vm_pager_deallocate(obj);
 	}
 	return (0);
 }
 
 int
 vop_stdgetvobject(ap)
 	struct vop_getvobject_args /* {
 		struct vnode *vp;
 		struct vm_object **objpp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vm_object **objpp = ap->a_objpp;
 
 	if (objpp)
 		*objpp = vp->v_object;
 	return (vp->v_object ? 0 : EINVAL);
 }
 
 /* 
  * vfs default ops
  * used to fill the vfs fucntion table to get reasonable default return values.
  */
 int 
 vfs_stdmount (mp, path, data, ndp, p)
 	struct mount *mp;
 	char *path;
 	caddr_t data; 
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	return (0);
 }
 
 int	
 vfs_stdunmount (mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	return (0);
 }
 
 int	
 vfs_stdroot (mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	return (EOPNOTSUPP);
 }
 
 int	
 vfs_stdstatfs (mp, sbp, p)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct proc *p;
 {
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdvptofh (vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	return (EOPNOTSUPP);
 }
 
 int	
 vfs_stdstart (mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	return (0);
 }
 
 int	
 vfs_stdquotactl (mp, cmds, uid, arg, p)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 	return (EOPNOTSUPP);
 }
 
 int	
 vfs_stdsync (mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred; 
 	struct proc *p;
 {
 	return (0);
 }
 
 int	
 vfs_stdvget (mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	return (EOPNOTSUPP);
 }
 
 int	
 vfs_stdfhtovp (mp, fhp, vpp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct vnode **vpp;
 {
 	return (EOPNOTSUPP);
 }
 
 int 
 vfs_stdcheckexp (mp, nam, extflagsp, credanonp)
 	struct mount *mp;
 	struct sockaddr *nam;
 	int *extflagsp;
 	struct ucred **credanonp;
 {
 	return (EOPNOTSUPP);
 }
 
 int
 vfs_stdinit (vfsp) 
 	struct vfsconf *vfsp;
 {
 	return (0);
 }
 
 int
 vfs_stduninit (vfsp)
 	struct vfsconf *vfsp;
 {
 	return(0);
 }
 
 int
 vfs_stdextattrctl(mp, cmd, filename_vp, attrnamespace, attrname, p)
 	struct mount *mp;
 	int cmd;
 	struct vnode *filename_vp;
 	int attrnamespace;
 	const char *attrname;
 	struct proc *p;
 {
 	return(EOPNOTSUPP);
 }
 
 /* end of vfs default ops */
Index: head/sys/kern/vfs_export.c
===================================================================
--- head/sys/kern/vfs_export.c	(revision 75579)
+++ head/sys/kern/vfs_export.c	(revision 75580)
@@ -1,3150 +1,3150 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  * $FreeBSD$
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 #include "opt_ffs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/event.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static void	addalias __P((struct vnode *vp, dev_t nvp_rdev));
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, never decreased.
  */
 static unsigned long	numvnodes;
 SYSCTL_LONG(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 /*
  * List of vnodes that are ready for recycling.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;
 
 /*
  * Minimum number of free vnodes.  If there are fewer than this free vnodes,
  * getnewvnode() will return a newly allocated vnode.
  */
 static u_long wantfreevnodes = 25;
 SYSCTL_LONG(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 /* Number of vnodes in the free list. */
 static u_long freevnodes = 0;
 SYSCTL_LONG(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 /*
  * Various variables used for debugging the new implementation of
  * reassignbuf().
  * XXX these are probably of (very) limited utility now.
  */
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
 static int reassignbufloops;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
 static int reassignbufsortgood;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
 static int reassignbufsortbad;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
 /* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
 static int reassignbufmethod = 1;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
 
 #ifdef ENABLE_VFS_IOOPT
 /* See NOTES for a description of this setting. */
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 #endif
 
 /* List of mounted filesystems. */
 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
 
 /* For any iteration/modification of mountlist */
 struct mtx mountlist_mtx;
 
 /* For any iteration/modification of mnt_vnodelist */
 struct mtx mntvnode_mtx;
 
 /*
  * Cache for the mount type id assigned to NFS.  This is used for
  * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c.
  */
 int	nfs_mount_type = -1;
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /* For any iteration/modification of vnode_free_list */
 static struct mtx vnode_free_list_mtx;
 
 /*
  * For any iteration/modification of dev->si_hlist (linked through
  * v_specnext)
  */
 static struct mtx spechash_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static vm_zone_t vnode_zone;
 
 /* Set to 1 to print out reclaim of active vnodes */
 int	prtactive = 0;
 
 /*
  * The workitem queue.
  * 
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno = 0;
 static long syncer_mask; 
 LIST_HEAD(synclist, vnode);
 static struct synclist *syncer_workitem_pending;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 time_t syncdelay = 30;		/* max time to delay syncing data */
 time_t filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
 time_t dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 time_t metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
 /*
  * Number of vnodes we want to exist at any one time.  This is mostly used
  * to size hash tables in vnode-related code.  It is normally not used in
  * getnewvnode(), as wantfreevnodes is normally nonzero.)
  *
  * XXX desiredvnodes is historical cruft and should not exist.
  */
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 
     &desiredvnodes, 0, "Maximum number of vnodes");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 static void
 vntblinit(void *dummy __unused)
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
 	mtx_init(&mntvnode_mtx, "mntvnode", MTX_DEF);
 	mtx_init(&mntid_mtx, "mntid", MTX_DEF);
 	mtx_init(&spechash_mtx, "spechash", MTX_DEF);
 	TAILQ_INIT(&vnode_free_list);
 	mtx_init(&vnode_free_list_mtx, "vnode_free_list", MTX_DEF);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
 	/*
 	 * Initialize the filesystem syncer.
 	 */     
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
 		&syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL)
 
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct mtx *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		msleep((caddr_t)mp, interlkp, PVFS, "vfs_busy", 0);
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	if (fstypename == NULL)
 		return (ENODEV);
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_iosize_max = DFLTPHYS;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 	    }
 	}
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_int16_t mntid_base;
 	fsid_t tfsid;
 	int mtype;
 
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makeudev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if (vfs_getvfs(&tfsid) == NULL)
 			break;
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_SEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(tsp)
 	struct timespec *tsp;
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
 	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else for (count = 0; count < freevnodes; count++) {
 		vp = TAILQ_FIRST(&vnode_free_list);
 		if (vp == NULL || vp->v_usecount)
 			panic("getnewvnode: free vnode isn't");
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		/*
 		 * Don't recycle if active in the namecache or
 		 * if it still has cached pages or we cannot get
 		 * its interlock.
 		 */
 		if (LIST_FIRST(&vp->v_cache_src) != NULL ||
 		    (VOP_GETVOBJECT(vp, &object) == 0 &&
 		     (object->resident_page_count || object->ref_count)) ||
 		    !mtx_trylock(&vp->v_interlock)) {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 			vp = NULL;
 			continue;
 		}
 		/*
 		 * Skip over it if its filesystem is being suspended.
 		 */
 		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
 			break;
 		mtx_unlock(&vp->v_interlock);
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		vp->v_flag &= ~VFREE;
 		freevnodes--;
 		mtx_unlock(&vnode_free_list_mtx);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			mtx_unlock(&vp->v_interlock);
 		}
 		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 			if (vp->v_writecount != 0)
 				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 	} else {
 		mtx_unlock(&vnode_free_list_mtx);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		mtx_init(&vp->v_interlock, "vnode interlock", MTX_DEF);
 		vp->v_dd = vp;
 		mtx_init(&vp->v_pollinfo.vpi_lock, "vnode pollinfo", MTX_DEF);
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	TAILQ_INIT(&vp->v_cleanblkhd);
 	TAILQ_INIT(&vp->v_dirtyblkhd);
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	lockinit(&vp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE);
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	mtx_lock(&mntvnode_mtx);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		mtx_unlock(&mntvnode_mtx);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	mtx_unlock(&mntvnode_mtx);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
 	if (flags & V_SAVE) {
 		s = splbio();
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
 			if (error) {
 				splx(s);
 				return (error);
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			splx(s);
 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 				return (error);
 			s = splbio();
 			if (vp->v_numoutput > 0 ||
 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
 				panic("vinvalbuf: dirty bufs");
 		}
 		splx(s);
   	}
 	s = splbio();
 	for (;;) {
 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
 		if (!blist)
 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 				error = BUF_TIMELOCK(bp,
 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
 				    "vinvalbuf", slpflag, slptimeo);
 				if (error == ENOLCK)
 					break;
 				splx(s);
 				return (error);
 			}
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.  Note that vfs_bio_awrite expects
-			 * buffers to reside on a queue, while VOP_BWRITE and
+			 * buffers to reside on a queue, while BUF_WRITE and
 			 * brelse do not.
 			 */
 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 				(flags & V_SAVE)) {
 
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						BUF_UNLOCK(bp);
 						vfs_bio_awrite(bp);
 					} else {
 						bremfree(bp);
 						bp->b_flags |= B_ASYNC;
 						BUF_WRITE(bp);
 					}
 				} else {
 					bremfree(bp);
 					(void) BUF_WRITE(bp);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	mtx_lock(&vp->v_interlock);
 	if (VOP_GETVOBJECT(vp, &object) == 0) {
 		vm_object_page_remove(object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	mtx_unlock(&vp->v_interlock);
 
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(vp, cred, p, length, blksize)
 	register struct vnode *vp;
 	struct ucred *cred;
 	struct proc *p;
 	off_t length;
 	int blksize;
 {
 	register struct buf *bp;
 	struct buf *nbp;
 	int s, anyfreed;
 	int trunclbn;
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = (length + blksize - 1) / blksize;
 
 	s = splbio();
 restart:
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp &&
 				    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 				    (nbp->b_vp != vp) ||
 				    (nbp->b_flags & B_DELWRI))) {
 					goto restart;
 				}
 			}
 		}
 
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp &&
 				    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 				    (nbp->b_vp != vp) ||
 				    (nbp->b_flags & B_DELWRI) == 0)) {
 					goto restart;
 				}
 			}
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					if (bp->b_vp == vp) {
 						bp->b_flags |= B_ASYNC;
 					} else {
 						bp->b_flags &= ~B_ASYNC;
 					}
 					BUF_WRITE(bp);
 				}
 				goto restartsync;
 			}
 
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
 	}
 
 	splx(s);
 
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	bp->b_dev = vn_todev(vp);
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bp->b_xflags |= BX_VNCLEAN;
 	bp->b_xflags &= ~BX_VNDIRTY;
 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	struct buflists *listheadp;
 	int s;
 
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 		if (bp->b_xflags & BX_VNDIRTY)
 			listheadp = &vp->v_dirtyblkhd;
 		else 
 			listheadp = &vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 	}
 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vp->v_flag &= ~VONWORKLST;
 		LIST_REMOVE(vp, v_synclist);
 	}
 	splx(s);
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 {
 	int s, slot;
 
 	s = splbio();
 
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
 	vp->v_flag |= VONWORKLST;
 	splx(s);
 }
 
 struct  proc *updateproc;
 static void sched_sync __P((void));
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
 
 /*
  * System filesystem synchronizer daemon.
  */
 void 
 sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
 	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
 
 	mtx_lock(&Giant);
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p,
 	    SHUTDOWN_PRI_LAST);   
 
 	for (;;) {
 		kthread_suspend_check(p);
 
 		starttime = time_second;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 */
 		s = splbio();
 		slp = &syncer_workitem_pending[syncer_delayno];
 		syncer_delayno += 1;
 		if (syncer_delayno == syncer_maxdelay)
 			syncer_delayno = 0;
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
 			if (VOP_ISLOCKED(vp, NULL) == 0 &&
 			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
 				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
 				/*
 				 * Note: v_tag VT_VFS vps can remain on the
 				 * worklist too with no dirty blocks, but 
 				 * since sync_fsync() moves it to a different 
 				 * slot we are safe.
 				 */
 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
 				    !vn_isdisk(vp, NULL))
 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
 				 * position and then add us back in at a later
 				 * position.
 				 */
 				vn_syncer_add_to_worklist(vp, syncdelay);
 			}
 			splx(s);
 		}
 
 		/*
 		 * Do soft update processing.
 		 */
 #ifdef SOFTUPDATES
 		softdep_process_worklist(NULL);
 #endif
 
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (time_second == starttime)
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer()
 {
 
 	mtx_lock_spin(&sched_lock);
 	if (updateproc->p_wchan == &lbolt)
 		setrunnable(updateproc);
 	mtx_unlock_spin(&sched_lock);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		return (1);
 	}
 	return(0);
 }
 
 /*
  * Associate a p-buffer with a vnode.
  *
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
  * with the buffer.  i.e. the bp has not been linked into the vnode or
  * ref-counted.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 
 	bp->b_vp = vp;
 	bp->b_flags |= B_PAGING;
 	bp->b_dev = vn_todev(vp);
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 
 	/* XXX REMOVE ME */
 	if (TAILQ_NEXT(bp, b_vnbufs) != NULL) {
 		panic(
 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x", 
 		    bp,
 		    (int)bp->b_flags
 		);
 	}
 	bp->b_vp = (struct vnode *) 0;
 	bp->b_flags &= ~B_PAGING;
 }
 
 /*
  * Change the vnode a pager buffer is associated with.
  */
 void
 pbreassignbuf(bp, newvp)
 	struct buf *bp;
 	struct vnode *newvp;
 {
 
 	KASSERT(bp->b_flags & B_PAGING,
 	    ("pbreassignbuf() on non phys bp %p", bp));
 	bp->b_vp = newvp;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
 	struct buflists *listheadp;
 	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 	++reassignbufcalls;
 
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 		if (bp->b_xflags & BX_VNDIRTY)
 			listheadp = &bp->b_vp->v_dirtyblkhd;
 		else 
 			listheadp = &bp->b_vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 		if (bp->b_vp != newvp) {
 			vdrop(bp->b_vp);
 			bp->b_vp = NULL;	/* for clarification */
 		}
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
 		listheadp = &newvp->v_dirtyblkhd;
 		if ((newvp->v_flag & VONWORKLST) == 0) {
 			switch (newvp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				if (newvp->v_rdev->si_mountpoint != NULL) {
 					delay = metadelay;
 					break;
 				}
 				/* fall through */
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(newvp, delay);
 		}
 		bp->b_xflags |= BX_VNDIRTY;
 		tbp = TAILQ_FIRST(listheadp);
 		if (tbp == NULL ||
 		    bp->b_lblkno == 0 ||
 		    (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (bp->b_lblkno < 0) {
 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (reassignbufmethod == 1) {
 			/*
 			 * New sorting algorithm, only handle sequential case,
 			 * otherwise append to end (but before metadata)
 			 */
 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
 			    (tbp->b_xflags & BX_VNDIRTY)) {
 				/*
 				 * Found the best place to insert the buffer
 				 */
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortgood;
 			} else {
 				/*
 				 * Missed, append to end, but before meta-data.
 				 * We know that the head buffer in the list is
 				 * not meta-data due to prior conditionals.
 				 *
 				 * Indirect effects:  NFS second stage write
 				 * tends to wind up here, giving maximum 
 				 * distance between the unstable write and the
 				 * commit rpc.
 				 */
 				tbp = TAILQ_LAST(listheadp, buflists);
 				while (tbp && tbp->b_lblkno < 0)
 					tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortbad;
 			}
 		} else {
 			/*
 			 * Old sorting algorithm, scan queue and insert
 			 */
 			struct buf *ttbp;
 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 			    (ttbp->b_lblkno < bp->b_lblkno)) {
 				++reassignbufloops;
 				tbp = ttbp;
 			}
 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 		}
 	} else {
 		bp->b_xflags |= BX_VNCLEAN;
 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 		if ((newvp->v_flag & VONWORKLST) &&
 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 			newvp->v_flag &= ~VONWORKLST;
 			LIST_REMOVE(newvp, v_synclist);
 		}
 	}
 	if (bp->b_vp != newvp) {
 		bp->b_vp = newvp;
 		vhold(bp->b_vp);
 	}
 	splx(s);
 }
 
 /*
  * Create a vnode for a device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV) {
 		*vpp = NULLVP;
 		return (ENXIO);
 	}
 	if (vfinddev(dev, VCHR, vpp))
 		return (0);
 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	vp = nvp;
 	vp->v_type = VCHR;
 	addalias(vp, dev);
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Add vnode to the alias list hung off the dev_t.
  *
  * The reason for this gunk is that multiple vnodes can reference
  * the same physical device, so checking vp->v_usecount to see
  * how many users there are is inadequate; the v_usecount for
  * the vnodes need to be accumulated.  vcount() does that.
  */
 struct vnode *
 addaliasu(nvp, nvp_rdev)
 	struct vnode *nvp;
 	udev_t nvp_rdev;
 {
 	struct vnode *ovp;
 	vop_t **ops;
 	dev_t dev;
 
 	if (nvp->v_type == VBLK)
 		return (nvp);
 	if (nvp->v_type != VCHR)
 		panic("addaliasu on non-special vnode");
 	dev = udev2dev(nvp_rdev, 0);
 	/*
 	 * Check to see if we have a bdevvp vnode with no associated
 	 * filesystem. If so, we want to associate the filesystem of
 	 * the new newly instigated vnode with the bdevvp vnode and
 	 * discard the newly created vnode rather than leaving the
 	 * bdevvp vnode lying around with no associated filesystem.
 	 */
 	if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
 		addalias(nvp, dev);
 		return (nvp);
 	}
 	/*
 	 * Discard unneeded vnode, but save its node specific data.
 	 * Note that if there is a lock, it is carried over in the
 	 * node specific data to the replacement vnode.
 	 */
 	vref(ovp);
 	ovp->v_data = nvp->v_data;
 	ovp->v_tag = nvp->v_tag;
 	nvp->v_data = NULL;
 	lockinit(&ovp->v_lock, PVFS, nvp->v_lock.lk_wmesg,
 	    nvp->v_lock.lk_timo, nvp->v_lock.lk_flags & LK_EXTFLG_MASK);
 	if (nvp->v_vnlock)
 		ovp->v_vnlock = &ovp->v_lock;
 	ops = ovp->v_op;
 	ovp->v_op = nvp->v_op;
 	if (VOP_ISLOCKED(nvp, curproc)) {
 		VOP_UNLOCK(nvp, 0, curproc);
 		vn_lock(ovp, LK_EXCLUSIVE | LK_RETRY, curproc);
 	}
 	nvp->v_op = ops;
 	insmntque(ovp, nvp->v_mount);
 	vrele(nvp);
 	vgone(nvp);
 	return (ovp);
 }
 
 /* This is a local helper function that do the same as addaliasu, but for a
  * dev_t instead of an udev_t. */
 static void
 addalias(nvp, dev)
 	struct vnode *nvp;
 	dev_t dev;
 {
 
 	KASSERT(nvp->v_type == VCHR, ("addalias on non-special vnode"));
 	nvp->v_rdev = dev;
 	mtx_lock(&spechash_mtx);
 	SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
 	mtx_unlock(&spechash_mtx);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set if the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0)
 		mtx_lock(&vp->v_interlock);
 	if (vp->v_flag & VXLOCK) {
 		if (vp->v_vxproc == curproc) {
 			printf("VXLOCK interlock avoided\n");
 		} else {
 			vp->v_flag |= VXWANT;
 			msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 			    "vget", 0);
 			return (ENOENT);
 		}
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			mtx_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			mtx_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	mtx_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /* 
  * Increase the reference count of a vnode.
  */
 void
 vref(struct vnode *vp)
 {
 	mtx_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	mtx_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
 
 	mtx_lock(&vp->v_interlock);
 
 	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		mtx_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		mtx_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 /* 
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() aquires the lock internally.)
  */
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
 	mtx_lock(&vp->v_interlock);
 	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		mtx_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 	int s;
 
   	s = splbio();
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	splx(s);
 }
 
 /*
  * Note that there is one less who cares about this vnode.  vdrop() is the
  * opposite of vhold().
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	splx(s);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	mtx_lock(&mntvnode_mtx);
 loop:
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		mtx_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			mtx_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			mtx_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			mtx_unlock(&mntvnode_mtx);
 			vgonel(vp, p);
 			mtx_lock(&mntvnode_mtx);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			mtx_unlock(&mntvnode_mtx);
 			if (vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			mtx_lock(&mntvnode_mtx);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		mtx_unlock(&vp->v_interlock);
 		busy++;
 	}
 	mtx_unlock(&mntvnode_mtx);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	vp->v_vxproc = curproc;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
 		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
 			(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
 
 	VOP_DESTROYVOBJECT(vp);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active) {
 		/*
 		 * Inline copy of vrele() since VOP_INACTIVE
 		 * has already been called.
 		 */
 		mtx_lock(&vp->v_interlock);
 		if (--vp->v_usecount <= 0) {
 #ifdef DIAGNOSTIC
 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 				vprint("vclean: bad ref count", vp);
 				panic("vclean: ref cnt");
 			}
 #endif
 			vfree(vp);
 		}
 		mtx_unlock(&vp->v_interlock);
 	}
 
 	cache_purge(vp);
 	vp->v_vnlock = NULL;
 	lockdestroy(&vp->v_lock);
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	vp->v_vxproc = NULL;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	dev_t dev;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 
 	vp = ap->a_vp;
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 		    "vop_revokeall", 0);
 		return (0);
 	}
 	dev = vp->v_rdev;
 	for (;;) {
 		mtx_lock(&spechash_mtx);
 		vq = SLIST_FIRST(&dev->si_hlist);
 		mtx_unlock(&spechash_mtx);
 		if (!vq)
 			break;
 		vgone(vq);
 	}
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct mtx *inter_lkp;
 	struct proc *p;
 {
 
 	mtx_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			mtx_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	mtx_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	mtx_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 		    "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	mtx_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if (vp->v_type == VCHR && vp->v_rdev != NULL && vp->v_rdev != NODEV) {
 		mtx_lock(&spechash_mtx);
 		SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext);
 		freedev(vp->v_rdev);
 		mtx_unlock(&spechash_mtx);
 		vp->v_rdev = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the
 	 * VDOOMED flag and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		mtx_lock(&vnode_free_list_mtx);
 		if (vp->v_flag & VFREE)
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		mtx_unlock(&vnode_free_list_mtx);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	mtx_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	struct vnode *vp;
 
 	mtx_lock(&spechash_mtx);
 	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
 		if (type == vp->v_type) {
 			*vpp = vp;
 			mtx_unlock(&spechash_mtx);
 			return (1);
 		}
 	}
 	mtx_unlock(&spechash_mtx);
 	return (0);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int count;
 
 	count = 0;
 	mtx_lock(&spechash_mtx);
 	SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext)
 		count += vq->v_usecount;
 	mtx_unlock(&spechash_mtx);
 	return (count);
 }
 
 /*
  * Same as above, but using the dev_t as argument
  */
 int
 count_dev(dev)
 	dev_t dev;
 {
 	struct vnode *vp;
 
 	vp = SLIST_FIRST(&dev->si_hlist);
 	if (vp == NULL)
 		return (0);
 	return(vcount(vp));
 }
 
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	struct vnode *vp;
 {
 	char buf[96];
 
 	if (label != NULL)
 		printf("%s: %p: ", label, (void *)vp);
 	else
 		printf("%p: ", (void *)vp);
 	printf("type %s, usecount %d, writecount %d, refcount %d,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 			if (VOP_ISLOCKED(vp, NULL))
 				vprint((char *)0, vp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, p);
 	}
 	mtx_unlock(&mountlist_mtx);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	/* XXX the below code does not compile; vfs_sysctl does not exist. */
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 
 #if COMPILING_LINT
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 again:
 		mtx_lock(&mntvnode_mtx);
 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				mtx_unlock(&mntvnode_mtx);
 				goto again;
 			}
 			nvp = LIST_NEXT(vp, v_mntvnodes);
 			mtx_unlock(&mntvnode_mtx);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			mtx_lock(&mntvnode_mtx);
 		}
 		mtx_unlock(&mntvnode_mtx);
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, p);
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	return (0);
 }
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 
 	if (vp->v_rdev->si_mountpoint != NULL)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp;
 	struct proc *p;
 	int error;
 
 	if (curproc != NULL)
 		p = curproc;
 	else
 		p = initproc;	/* XXX XXX should this be proc0? */
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	while(!TAILQ_EMPTY(&mountlist)) {
 		mp = TAILQ_LAST(&mountlist, mntlist);
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			TAILQ_REMOVE(&mountlist, mp, mnt_list);
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		} else {
 			/* The unmount has removed mp from the mountlist */
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		bzero(&np->netc_anon, sizeof(np->netc_anon));
 		np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 		np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 		bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 		    sizeof(np->netc_anon.cr_groups));
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	bzero(&np->netc_anon, sizeof(np->netc_anon));
 	np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 	np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 	bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 	    sizeof(np->netc_anon.cr_groups));
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* Helper for vfs_free_addrlist. */
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 /*
  * High level function to manipulate export options on a mount point
  * and the passed in netexport.
  * Struct export_args *argp is the variable used to twiddle options,
  * the structure is described in sys/mount.h
  */
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 /*
  * Used by the filesystems to determine if a given network address
  * (passed in 'nam') is present in thier exports list, returns a pointer
  * to struct netcred so that the filesystem can examine it for
  * access rights (read/write/etc).
  */
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	struct vm_object *obj;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
 
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
 			continue;
 
 		if (flags != MNT_WAIT) {
 			if (VOP_GETVOBJECT(vp, &obj) != 0 ||
 			    (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
 				continue;
 			if (VOP_ISLOCKED(vp, NULL))
 				continue;
 		}
 
 		mtx_lock(&vp->v_interlock);
 		if (VOP_GETVOBJECT(vp, &obj) == 0 &&
 		    (obj->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (VOP_GETVOBJECT(vp, &obj) == 0) {
 					vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			mtx_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * vp must be locked when vfs_object_create is called.
  */
 int
 vfs_object_create(vp, p, cred)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 {
 	return (VOP_CREATEVOBJECT(vp, cred, p));
 }
 
 /*
  * Mark a vnode as free, putting it up for recycling.
  */
 void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 	KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	mtx_unlock(&vnode_free_list_mtx);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 /* 
  * Opposite of vfree() - mark a vnode as in use.
  */
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 	KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 	freevnodes--;
 	mtx_unlock(&vnode_free_list_mtx);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 #define VN_KNOTE(vp, b) \
 	KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
         VN_KNOTE(vp, NOTE_REVOKE);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 static int	sync_fsync __P((struct  vop_fsync_args *));
 static int	sync_inactive __P((struct  vop_inactive_args *));
 static int	sync_reclaim  __P((struct  vop_reclaim_args *));
 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 static int	sync_print __P((struct vop_print_args *));
 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 
 static vop_t **sync_vnodeop_p;
 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 	{ &sync_vnodeop_p, sync_vnodeop_entries };
 
 VNODEOP_SET(sync_vnodeop_opv_desc);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 int
 vfs_allocate_syncvnode(mp)
 	struct mount *mp;
 {
 	struct vnode *vp;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 		mp->mnt_syncer = NULL;
 		return (error);
 	}
 	vp->v_type = VNON;
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 	mp->mnt_syncer = vp;
 	return (0);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	struct proc *p = ap->a_p;
 	int asyncflag;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	vn_syncer_add_to_worklist(syncvp, syncdelay);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	mtx_lock(&mountlist_mtx);
 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, p) != 0) {
 		mtx_unlock(&mountlist_mtx);
 		return (0);
 	}
 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
 		vfs_unbusy(mp, p);
 		return (0);
 	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
 	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected at splbio().
  */
 static int
 sync_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 
 	s = splbio();
 	vp->v_mount->mnt_syncer = NULL;
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 		vp->v_flag &= ~VONWORKLST;
 	}
 	splx(s);
 
 	return (0);
 }
 
 /*
  * Print out a syncer vnode.
  */
 static int
 sync_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	printf("syncer vnode");
 	if (vp->v_vnlock != NULL)
 		lockmgr_printinfo(vp->v_vnlock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * extract the dev_t from a VCHR
  */
 dev_t
 vn_todev(vp)
 	struct vnode *vp;
 {
 	if (vp->v_type != VCHR)
 		return (NODEV);
 	return (vp->v_rdev);
 }
 
 /*
  * Check if vnode represents a disk device
  */
 int
 vn_isdisk(vp, errp)
 	struct vnode *vp;
 	int *errp;
 {
 	struct cdevsw *cdevsw;
 
 	if (vp->v_type != VCHR) {
 		if (errp != NULL)
 			*errp = ENOTBLK;
 		return (0);
 	}
 	if (vp->v_rdev == NULL) {
 		if (errp != NULL)
 			*errp = ENXIO;
 		return (0);
 	}
 	cdevsw = devsw(vp->v_rdev);
 	if (cdevsw == NULL) {
 		if (errp != NULL)
 			*errp = ENXIO;
 		return (0);
 	}
 	if (!(cdevsw->d_flags & D_DISK)) {
 		if (errp != NULL)
 			*errp = ENOTBLK;
 		return (0);
 	}
 	if (errp != NULL)
 		*errp = 0;
 	return (1);
 }
 
 /*
  * Free data allocated by namei(); see namei(9) for details.
  */
 void
 NDFREE(ndp, flags)
      struct nameidata *ndp;
      const uint flags;
 {
 	if (!(flags & NDF_NO_FREE_PNBUF) &&
 	    (ndp->ni_cnd.cn_flags & HASBUF)) {
 		zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
 		ndp->ni_cnd.cn_flags &= ~HASBUF;
 	}
 	if (!(flags & NDF_NO_DVP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
 	    ndp->ni_dvp != ndp->ni_vp)
 		VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc);
 	if (!(flags & NDF_NO_DVP_RELE) &&
 	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
 		vrele(ndp->ni_dvp);
 		ndp->ni_dvp = NULL;
 	}
 	if (!(flags & NDF_NO_VP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
 		VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc);
 	if (!(flags & NDF_NO_VP_RELE) &&
 	    ndp->ni_vp) {
 		vrele(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 	}
 	if (!(flags & NDF_NO_STARTDIR_RELE) &&
 	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
 		vrele(ndp->ni_startdir);
 		ndp->ni_startdir = NULL;
 	}
 }
 
 /*
  * Common file system object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
  * to indicate to the caller whether privilege was used to satisfy the
  * request.  Returns 0 on success, or an errno on failure.
  */
 int
 vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused)
 	enum vtype type;
 	mode_t file_mode;
 	uid_t file_uid;
 	gid_t file_gid;
 	mode_t acc_mode;
 	struct ucred *cred;
 	int *privused;
 {
 	mode_t dac_granted;
 #ifdef CAPABILITIES
 	mode_t cap_granted;
 #endif
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	if (privused != NULL)
 		*privused = 0;
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= VWRITE;
 
 		if ((acc_mode & dac_granted) == acc_mode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= VWRITE;
 
 		if ((acc_mode & dac_granted) == acc_mode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= VWRITE;
 	if ((acc_mode & dac_granted) == acc_mode)
 		return (0);
 
 privcheck:
 	if (!suser_xxx(cred, NULL, PRISON_ROOT)) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 
 #ifdef CAPABILITIES
 	/*
 	 * Build a capability mask to determine if the set of capabilities
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.
 	 * For each capability, if the capability is required, bitwise
 	 * or the request type onto the cap_granted mask.
 	 */
 	cap_granted = 0;
 	if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT))
 	    cap_granted |= VEXEC;
 
 	if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT))
 		cap_granted |= VREAD;
 
 	if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT))
 		cap_granted |= VWRITE;
 
 	if ((acc_mode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_FOWNER, PRISON_ROOT))
 		cap_granted |= VADMIN;
 
 	if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 #endif
 
 	return ((acc_mode & VADMIN) ? EPERM : EACCES);
 }
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c	(revision 75579)
+++ head/sys/kern/vfs_subr.c	(revision 75580)
@@ -1,3150 +1,3150 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
  * $FreeBSD$
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 #include "opt_ffs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/dirent.h>
 #include <sys/domain.h>
 #include <sys/eventhandler.h>
 #include <sys/event.h>
 #include <sys/fcntl.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/reboot.h>
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static void	addalias __P((struct vnode *vp, dev_t nvp_rdev));
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 
 /*
  * Number of vnodes in existence.  Increased whenever getnewvnode()
  * allocates a new vnode, never decreased.
  */
 static unsigned long	numvnodes;
 SYSCTL_LONG(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 /*
  * Conversion tables for conversion from vnode types to inode formats
  * and back.
  */
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 /*
  * List of vnodes that are ready for recycling.
  */
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;
 
 /*
  * Minimum number of free vnodes.  If there are fewer than this free vnodes,
  * getnewvnode() will return a newly allocated vnode.
  */
 static u_long wantfreevnodes = 25;
 SYSCTL_LONG(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 /* Number of vnodes in the free list. */
 static u_long freevnodes = 0;
 SYSCTL_LONG(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 /*
  * Various variables used for debugging the new implementation of
  * reassignbuf().
  * XXX these are probably of (very) limited utility now.
  */
 static int reassignbufcalls;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, "");
 static int reassignbufloops;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, &reassignbufloops, 0, "");
 static int reassignbufsortgood;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, &reassignbufsortgood, 0, "");
 static int reassignbufsortbad;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad, 0, "");
 /* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
 static int reassignbufmethod = 1;
 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
 
 #ifdef ENABLE_VFS_IOOPT
 /* See NOTES for a description of this setting. */
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 #endif
 
 /* List of mounted filesystems. */
 struct mntlist mountlist = TAILQ_HEAD_INITIALIZER(mountlist);
 
 /* For any iteration/modification of mountlist */
 struct mtx mountlist_mtx;
 
 /* For any iteration/modification of mnt_vnodelist */
 struct mtx mntvnode_mtx;
 
 /*
  * Cache for the mount type id assigned to NFS.  This is used for
  * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c.
  */
 int	nfs_mount_type = -1;
 
 /* To keep more than one thread at a time from running vfs_getnewfsid */
 static struct mtx mntid_mtx;
 
 /* For any iteration/modification of vnode_free_list */
 static struct mtx vnode_free_list_mtx;
 
 /*
  * For any iteration/modification of dev->si_hlist (linked through
  * v_specnext)
  */
 static struct mtx spechash_mtx;
 
 /* Publicly exported FS */
 struct nfs_public nfs_pub;
 
 /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */
 static vm_zone_t vnode_zone;
 
 /* Set to 1 to print out reclaim of active vnodes */
 int	prtactive = 0;
 
 /*
  * The workitem queue.
  * 
  * It is useful to delay writes of file data and filesystem metadata
  * for tens of seconds so that quickly created and deleted files need
  * not waste disk bandwidth being created and removed. To realize this,
  * we append vnodes to a "workitem" queue. When running with a soft
  * updates implementation, most pending metadata dependencies should
  * not wait for more than a few seconds. Thus, mounted on block devices
  * are delayed only about a half the time that file data is delayed.
  * Similarly, directory updates are more critical, so are only delayed
  * about a third the time that file data is delayed. Thus, there are
  * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
  * one each second (driven off the filesystem syncer process). The
  * syncer_delayno variable indicates the next queue that is to be processed.
  * Items that need to be processed soon are placed in this queue:
  *
  *	syncer_workitem_pending[syncer_delayno]
  *
  * A delay of fifteen seconds is done by placing the request fifteen
  * entries later in the queue:
  *
  *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
  *
  */
 static int syncer_delayno = 0;
 static long syncer_mask; 
 LIST_HEAD(synclist, vnode);
 static struct synclist *syncer_workitem_pending;
 
 #define SYNCER_MAXDELAY		32
 static int syncer_maxdelay = SYNCER_MAXDELAY;	/* maximum delay time */
 time_t syncdelay = 30;		/* max time to delay syncing data */
 time_t filedelay = 30;		/* time to delay syncing files */
 SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, "");
 time_t dirdelay = 29;		/* time to delay syncing directories */
 SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, "");
 time_t metadelay = 28;		/* time to delay syncing metadata */
 SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, "");
 static int rushjob;		/* number of slots to run ASAP */
 static int stat_rush_requests;	/* number of times I/O speeded up */
 SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, "");
 
 /*
  * Number of vnodes we want to exist at any one time.  This is mostly used
  * to size hash tables in vnode-related code.  It is normally not used in
  * getnewvnode(), as wantfreevnodes is normally nonzero.)
  *
  * XXX desiredvnodes is historical cruft and should not exist.
  */
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 
     &desiredvnodes, 0, "Maximum number of vnodes");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 static void
 vntblinit(void *dummy __unused)
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
 	mtx_init(&mntvnode_mtx, "mntvnode", MTX_DEF);
 	mtx_init(&mntid_mtx, "mntid", MTX_DEF);
 	mtx_init(&spechash_mtx, "spechash", MTX_DEF);
 	TAILQ_INIT(&vnode_free_list);
 	mtx_init(&vnode_free_list_mtx, "vnode_free_list", MTX_DEF);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
 	/*
 	 * Initialize the filesystem syncer.
 	 */     
 	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
 		&syncer_mask);
 	syncer_maxdelay = syncer_mask + 1;
 }
 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL)
 
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct mtx *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		msleep((caddr_t)mp, interlkp, PVFS, "vfs_busy", 0);
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	if (fstypename == NULL)
 		return (ENODEV);
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO);
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_iosize_max = DFLTPHYS;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	mtx_lock(&mountlist_mtx);
 	TAILQ_FOREACH(mp, &mountlist, mnt_list) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			mtx_unlock(&mountlist_mtx);
 			return (mp);
 	    }
 	}
 	mtx_unlock(&mountlist_mtx);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid.  Try to make its val[0] unique, since this value
  * will be used to create fake device numbers for stat().  Also try (but
  * not so hard) make its val[0] unique mod 2^16, since some emulators only
  * support 16-bit device numbers.  We end up with unique val[0]'s for the
  * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls.
  *
  * Keep in mind that several mounts may be running in parallel.  Starting
  * the search one past where the previous search terminated is both a
  * micro-optimization and a defense against returning the same fsid to
  * different mounts.
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_int16_t mntid_base;
 	fsid_t tfsid;
 	int mtype;
 
 	mtx_lock(&mntid_mtx);
 	mtype = mp->mnt_vfc->vfc_typenum;
 	tfsid.val[1] = mtype;
 	mtype = (mtype & 0xFF) << 24;
 	for (;;) {
 		tfsid.val[0] = makeudev(255,
 		    mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF));
 		mntid_base++;
 		if (vfs_getvfs(&tfsid) == NULL)
 			break;
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	mp->mnt_stat.f_fsid.val[1] = tfsid.val[1];
 	mtx_unlock(&mntid_mtx);
 }
 
 /*
  * Knob to control the precision of file timestamps:
  *
  *   0 = seconds only; nanoseconds zeroed.
  *   1 = seconds and nanoseconds, accurate within 1/HZ.
  *   2 = seconds and nanoseconds, truncated to microseconds.
  * >=3 = seconds and nanoseconds, maximum precision.
  */
 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC };
 
 static int timestamp_precision = TSP_SEC;
 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW,
     &timestamp_precision, 0, "");
 
 /*
  * Get a current timestamp.
  */
 void
 vfs_timestamp(tsp)
 	struct timespec *tsp;
 {
 	struct timeval tv;
 
 	switch (timestamp_precision) {
 	case TSP_SEC:
 		tsp->tv_sec = time_second;
 		tsp->tv_nsec = 0;
 		break;
 	case TSP_HZ:
 		getnanotime(tsp);
 		break;
 	case TSP_USEC:
 		microtime(&tv);
 		TIMEVAL_TO_TIMESPEC(&tv, tsp);
 		break;
 	case TSP_NSEC:
 	default:
 		nanotime(tsp);
 		break;
 	}
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = VNOVAL;
 	vap->va_nlink = VNOVAL;
 	vap->va_uid = VNOVAL;
 	vap->va_gid = VNOVAL;
 	vap->va_fsid = VNOVAL;
 	vap->va_fileid = VNOVAL;
 	vap->va_blocksize = VNOVAL;
 	vap->va_rdev = VNOVAL;
 	vap->va_atime.tv_sec = VNOVAL;
 	vap->va_atime.tv_nsec = VNOVAL;
 	vap->va_mtime.tv_sec = VNOVAL;
 	vap->va_mtime.tv_nsec = VNOVAL;
 	vap->va_ctime.tv_sec = VNOVAL;
 	vap->va_ctime.tv_nsec = VNOVAL;
 	vap->va_flags = VNOVAL;
 	vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s, count;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp = NULL;
 	struct mount *vnmp;
 	vm_object_t object;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else for (count = 0; count < freevnodes; count++) {
 		vp = TAILQ_FIRST(&vnode_free_list);
 		if (vp == NULL || vp->v_usecount)
 			panic("getnewvnode: free vnode isn't");
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		/*
 		 * Don't recycle if active in the namecache or
 		 * if it still has cached pages or we cannot get
 		 * its interlock.
 		 */
 		if (LIST_FIRST(&vp->v_cache_src) != NULL ||
 		    (VOP_GETVOBJECT(vp, &object) == 0 &&
 		     (object->resident_page_count || object->ref_count)) ||
 		    !mtx_trylock(&vp->v_interlock)) {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 			vp = NULL;
 			continue;
 		}
 		/*
 		 * Skip over it if its filesystem is being suspended.
 		 */
 		if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
 			break;
 		mtx_unlock(&vp->v_interlock);
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		vp = NULL;
 	}
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		vp->v_flag &= ~VFREE;
 		freevnodes--;
 		mtx_unlock(&vnode_free_list_mtx);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			mtx_unlock(&vp->v_interlock);
 		}
 		vn_finished_write(vnmp);
 
 #ifdef INVARIANTS
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 			if (vp->v_writecount != 0)
 				panic("Non-zero write count");
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 	} else {
 		mtx_unlock(&vnode_free_list_mtx);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		mtx_init(&vp->v_interlock, "vnode interlock", MTX_DEF);
 		vp->v_dd = vp;
 		mtx_init(&vp->v_pollinfo.vpi_lock, "vnode pollinfo", MTX_DEF);
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	TAILQ_INIT(&vp->v_cleanblkhd);
 	TAILQ_INIT(&vp->v_dirtyblkhd);
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	lockinit(&vp->v_lock, PVFS, "vnlock", 0, LK_NOPAUSE);
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	mtx_lock(&mntvnode_mtx);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		mtx_unlock(&mntvnode_mtx);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	mtx_unlock(&mntvnode_mtx);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
 	if (flags & V_SAVE) {
 		s = splbio();
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 			    slpflag | (PRIBIO + 1), "vinvlbuf", slptimeo);
 			if (error) {
 				splx(s);
 				return (error);
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 			splx(s);
 			if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) != 0)
 				return (error);
 			s = splbio();
 			if (vp->v_numoutput > 0 ||
 			    !TAILQ_EMPTY(&vp->v_dirtyblkhd))
 				panic("vinvalbuf: dirty bufs");
 		}
 		splx(s);
   	}
 	s = splbio();
 	for (;;) {
 		blist = TAILQ_FIRST(&vp->v_cleanblkhd);
 		if (!blist)
 			blist = TAILQ_FIRST(&vp->v_dirtyblkhd);
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 				error = BUF_TIMELOCK(bp,
 				    LK_EXCLUSIVE | LK_SLEEPFAIL,
 				    "vinvalbuf", slpflag, slptimeo);
 				if (error == ENOLCK)
 					break;
 				splx(s);
 				return (error);
 			}
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.  Note that vfs_bio_awrite expects
-			 * buffers to reside on a queue, while VOP_BWRITE and
+			 * buffers to reside on a queue, while BUF_WRITE and
 			 * brelse do not.
 			 */
 			if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) &&
 				(flags & V_SAVE)) {
 
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						BUF_UNLOCK(bp);
 						vfs_bio_awrite(bp);
 					} else {
 						bremfree(bp);
 						bp->b_flags |= B_ASYNC;
 						BUF_WRITE(bp);
 					}
 				} else {
 					bremfree(bp);
 					(void) BUF_WRITE(bp);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF);
 			bp->b_flags &= ~B_ASYNC;
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	mtx_lock(&vp->v_interlock);
 	if (VOP_GETVOBJECT(vp, &object) == 0) {
 		vm_object_page_remove(object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	mtx_unlock(&vp->v_interlock);
 
 	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) || !TAILQ_EMPTY(&vp->v_cleanblkhd))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Truncate a file's buffer and pages to a specified length.  This
  * is in lieu of the old vinvalbuf mechanism, which performed unneeded
  * sync activity.
  */
 int
 vtruncbuf(vp, cred, p, length, blksize)
 	register struct vnode *vp;
 	struct ucred *cred;
 	struct proc *p;
 	off_t length;
 	int blksize;
 {
 	register struct buf *bp;
 	struct buf *nbp;
 	int s, anyfreed;
 	int trunclbn;
 
 	/*
 	 * Round up to the *next* lbn.
 	 */
 	trunclbn = (length + blksize - 1) / blksize;
 
 	s = splbio();
 restart:
 	anyfreed = 1;
 	for (;anyfreed;) {
 		anyfreed = 0;
 		for (bp = TAILQ_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp &&
 				    (((nbp->b_xflags & BX_VNCLEAN) == 0) ||
 				    (nbp->b_vp != vp) ||
 				    (nbp->b_flags & B_DELWRI))) {
 					goto restart;
 				}
 			}
 		}
 
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bp->b_lblkno >= trunclbn) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					bp->b_flags |= (B_INVAL | B_RELBUF);
 					bp->b_flags &= ~B_ASYNC;
 					brelse(bp);
 					anyfreed = 1;
 				}
 				if (nbp &&
 				    (((nbp->b_xflags & BX_VNDIRTY) == 0) ||
 				    (nbp->b_vp != vp) ||
 				    (nbp->b_flags & B_DELWRI) == 0)) {
 					goto restart;
 				}
 			}
 		}
 	}
 
 	if (length > 0) {
 restartsync:
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if ((bp->b_flags & B_DELWRI) && (bp->b_lblkno < 0)) {
 				if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 					BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL);
 					goto restart;
 				} else {
 					bremfree(bp);
 					if (bp->b_vp == vp) {
 						bp->b_flags |= B_ASYNC;
 					} else {
 						bp->b_flags &= ~B_ASYNC;
 					}
 					BUF_WRITE(bp);
 				}
 				goto restartsync;
 			}
 
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vbtrunc", 0);
 	}
 
 	splx(s);
 
 	vnode_pager_setsize(vp, length);
 
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 	KASSERT(bp->b_vp == NULL, ("bgetvp: not free"));
 
 	vhold(vp);
 	bp->b_vp = vp;
 	bp->b_dev = vn_todev(vp);
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bp->b_xflags |= BX_VNCLEAN;
 	bp->b_xflags &= ~BX_VNDIRTY;
 	TAILQ_INSERT_TAIL(&vp->v_cleanblkhd, bp, b_vnbufs);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	struct buflists *listheadp;
 	int s;
 
 	KASSERT(bp->b_vp != NULL, ("brelvp: NULL"));
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 		if (bp->b_xflags & BX_VNDIRTY)
 			listheadp = &vp->v_dirtyblkhd;
 		else 
 			listheadp = &vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 	}
 	if ((vp->v_flag & VONWORKLST) && TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
 		vp->v_flag &= ~VONWORKLST;
 		LIST_REMOVE(vp, v_synclist);
 	}
 	splx(s);
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
  * Add an item to the syncer work queue.
  */
 static void
 vn_syncer_add_to_worklist(struct vnode *vp, int delay)
 {
 	int s, slot;
 
 	s = splbio();
 
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 	}
 
 	if (delay > syncer_maxdelay - 2)
 		delay = syncer_maxdelay - 2;
 	slot = (syncer_delayno + delay) & syncer_mask;
 
 	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
 	vp->v_flag |= VONWORKLST;
 	splx(s);
 }
 
 struct  proc *updateproc;
 static void sched_sync __P((void));
 static struct kproc_desc up_kp = {
 	"syncer",
 	sched_sync,
 	&updateproc
 };
 SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
 
 /*
  * System filesystem synchronizer daemon.
  */
 void 
 sched_sync(void)
 {
 	struct synclist *slp;
 	struct vnode *vp;
 	struct mount *mp;
 	long starttime;
 	int s;
 	struct proc *p = updateproc;
 
 	mtx_lock(&Giant);
 
 	EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p,
 	    SHUTDOWN_PRI_LAST);   
 
 	for (;;) {
 		kthread_suspend_check(p);
 
 		starttime = time_second;
 
 		/*
 		 * Push files whose dirty time has expired.  Be careful
 		 * of interrupt race on slp queue.
 		 */
 		s = splbio();
 		slp = &syncer_workitem_pending[syncer_delayno];
 		syncer_delayno += 1;
 		if (syncer_delayno == syncer_maxdelay)
 			syncer_delayno = 0;
 		splx(s);
 
 		while ((vp = LIST_FIRST(slp)) != NULL) {
 			if (VOP_ISLOCKED(vp, NULL) == 0 &&
 			    vn_start_write(vp, &mp, V_NOWAIT) == 0) {
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 				(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
 				VOP_UNLOCK(vp, 0, p);
 				vn_finished_write(mp);
 			}
 			s = splbio();
 			if (LIST_FIRST(slp) == vp) {
 				/*
 				 * Note: v_tag VT_VFS vps can remain on the
 				 * worklist too with no dirty blocks, but 
 				 * since sync_fsync() moves it to a different 
 				 * slot we are safe.
 				 */
 				if (TAILQ_EMPTY(&vp->v_dirtyblkhd) &&
 				    !vn_isdisk(vp, NULL))
 					panic("sched_sync: fsync failed vp %p tag %d", vp, vp->v_tag);
 				/*
 				 * Put us back on the worklist.  The worklist
 				 * routine will remove us from our current
 				 * position and then add us back in at a later
 				 * position.
 				 */
 				vn_syncer_add_to_worklist(vp, syncdelay);
 			}
 			splx(s);
 		}
 
 		/*
 		 * Do soft update processing.
 		 */
 #ifdef SOFTUPDATES
 		softdep_process_worklist(NULL);
 #endif
 
 		/*
 		 * The variable rushjob allows the kernel to speed up the
 		 * processing of the filesystem syncer process. A rushjob
 		 * value of N tells the filesystem syncer to process the next
 		 * N seconds worth of work on its queue ASAP. Currently rushjob
 		 * is used by the soft update code to speed up the filesystem
 		 * syncer process when the incore state is getting so far
 		 * ahead of the disk that the kernel memory pool is being
 		 * threatened with exhaustion.
 		 */
 		if (rushjob > 0) {
 			rushjob -= 1;
 			continue;
 		}
 		/*
 		 * If it has taken us less than a second to process the
 		 * current work, then wait. Otherwise start right over
 		 * again. We can still lose time if any single round
 		 * takes more than two seconds, but it does not really
 		 * matter as we are just trying to generally pace the
 		 * filesystem activity.
 		 */
 		if (time_second == starttime)
 			tsleep(&lbolt, PPAUSE, "syncer", 0);
 	}
 }
 
 /*
  * Request the syncer daemon to speed up its work.
  * We never push it to speed up more than half of its
  * normal turn time, otherwise it could take over the cpu.
  */
 int
 speedup_syncer()
 {
 
 	mtx_lock_spin(&sched_lock);
 	if (updateproc->p_wchan == &lbolt)
 		setrunnable(updateproc);
 	mtx_unlock_spin(&sched_lock);
 	if (rushjob < syncdelay / 2) {
 		rushjob += 1;
 		stat_rush_requests += 1;
 		return (1);
 	}
 	return(0);
 }
 
 /*
  * Associate a p-buffer with a vnode.
  *
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
  * with the buffer.  i.e. the bp has not been linked into the vnode or
  * ref-counted.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp == NULL, ("pbgetvp: not free"));
 
 	bp->b_vp = vp;
 	bp->b_flags |= B_PAGING;
 	bp->b_dev = vn_todev(vp);
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 	KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL"));
 
 	/* XXX REMOVE ME */
 	if (TAILQ_NEXT(bp, b_vnbufs) != NULL) {
 		panic(
 		    "relpbuf(): b_vp was probably reassignbuf()d %p %x", 
 		    bp,
 		    (int)bp->b_flags
 		);
 	}
 	bp->b_vp = (struct vnode *) 0;
 	bp->b_flags &= ~B_PAGING;
 }
 
 /*
  * Change the vnode a pager buffer is associated with.
  */
 void
 pbreassignbuf(bp, newvp)
 	struct buf *bp;
 	struct vnode *newvp;
 {
 
 	KASSERT(bp->b_flags & B_PAGING,
 	    ("pbreassignbuf() on non phys bp %p", bp));
 	bp->b_vp = newvp;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
 	struct buflists *listheadp;
 	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 	++reassignbufcalls;
 
 	/*
 	 * B_PAGING flagged buffers cannot be reassigned because their vp
 	 * is not fully linked in.
 	 */
 	if (bp->b_flags & B_PAGING)
 		panic("cannot reassign paging buffer");
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) {
 		if (bp->b_xflags & BX_VNDIRTY)
 			listheadp = &bp->b_vp->v_dirtyblkhd;
 		else 
 			listheadp = &bp->b_vp->v_cleanblkhd;
 		TAILQ_REMOVE(listheadp, bp, b_vnbufs);
 		bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN);
 		if (bp->b_vp != newvp) {
 			vdrop(bp->b_vp);
 			bp->b_vp = NULL;	/* for clarification */
 		}
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
 		listheadp = &newvp->v_dirtyblkhd;
 		if ((newvp->v_flag & VONWORKLST) == 0) {
 			switch (newvp->v_type) {
 			case VDIR:
 				delay = dirdelay;
 				break;
 			case VCHR:
 				if (newvp->v_rdev->si_mountpoint != NULL) {
 					delay = metadelay;
 					break;
 				}
 				/* fall through */
 			default:
 				delay = filedelay;
 			}
 			vn_syncer_add_to_worklist(newvp, delay);
 		}
 		bp->b_xflags |= BX_VNDIRTY;
 		tbp = TAILQ_FIRST(listheadp);
 		if (tbp == NULL ||
 		    bp->b_lblkno == 0 ||
 		    (bp->b_lblkno > 0 && tbp->b_lblkno < 0) ||
 		    (bp->b_lblkno > 0 && bp->b_lblkno < tbp->b_lblkno)) {
 			TAILQ_INSERT_HEAD(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (bp->b_lblkno < 0) {
 			TAILQ_INSERT_TAIL(listheadp, bp, b_vnbufs);
 			++reassignbufsortgood;
 		} else if (reassignbufmethod == 1) {
 			/*
 			 * New sorting algorithm, only handle sequential case,
 			 * otherwise append to end (but before metadata)
 			 */
 			if ((tbp = gbincore(newvp, bp->b_lblkno - 1)) != NULL &&
 			    (tbp->b_xflags & BX_VNDIRTY)) {
 				/*
 				 * Found the best place to insert the buffer
 				 */
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortgood;
 			} else {
 				/*
 				 * Missed, append to end, but before meta-data.
 				 * We know that the head buffer in the list is
 				 * not meta-data due to prior conditionals.
 				 *
 				 * Indirect effects:  NFS second stage write
 				 * tends to wind up here, giving maximum 
 				 * distance between the unstable write and the
 				 * commit rpc.
 				 */
 				tbp = TAILQ_LAST(listheadp, buflists);
 				while (tbp && tbp->b_lblkno < 0)
 					tbp = TAILQ_PREV(tbp, buflists, b_vnbufs);
 				TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 				++reassignbufsortbad;
 			}
 		} else {
 			/*
 			 * Old sorting algorithm, scan queue and insert
 			 */
 			struct buf *ttbp;
 			while ((ttbp = TAILQ_NEXT(tbp, b_vnbufs)) &&
 			    (ttbp->b_lblkno < bp->b_lblkno)) {
 				++reassignbufloops;
 				tbp = ttbp;
 			}
 			TAILQ_INSERT_AFTER(listheadp, tbp, bp, b_vnbufs);
 		}
 	} else {
 		bp->b_xflags |= BX_VNCLEAN;
 		TAILQ_INSERT_TAIL(&newvp->v_cleanblkhd, bp, b_vnbufs);
 		if ((newvp->v_flag & VONWORKLST) &&
 		    TAILQ_EMPTY(&newvp->v_dirtyblkhd)) {
 			newvp->v_flag &= ~VONWORKLST;
 			LIST_REMOVE(newvp, v_synclist);
 		}
 	}
 	if (bp->b_vp != newvp) {
 		bp->b_vp = newvp;
 		vhold(bp->b_vp);
 	}
 	splx(s);
 }
 
 /*
  * Create a vnode for a device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV) {
 		*vpp = NULLVP;
 		return (ENXIO);
 	}
 	if (vfinddev(dev, VCHR, vpp))
 		return (0);
 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	vp = nvp;
 	vp->v_type = VCHR;
 	addalias(vp, dev);
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Add vnode to the alias list hung off the dev_t.
  *
  * The reason for this gunk is that multiple vnodes can reference
  * the same physical device, so checking vp->v_usecount to see
  * how many users there are is inadequate; the v_usecount for
  * the vnodes need to be accumulated.  vcount() does that.
  */
 struct vnode *
 addaliasu(nvp, nvp_rdev)
 	struct vnode *nvp;
 	udev_t nvp_rdev;
 {
 	struct vnode *ovp;
 	vop_t **ops;
 	dev_t dev;
 
 	if (nvp->v_type == VBLK)
 		return (nvp);
 	if (nvp->v_type != VCHR)
 		panic("addaliasu on non-special vnode");
 	dev = udev2dev(nvp_rdev, 0);
 	/*
 	 * Check to see if we have a bdevvp vnode with no associated
 	 * filesystem. If so, we want to associate the filesystem of
 	 * the new newly instigated vnode with the bdevvp vnode and
 	 * discard the newly created vnode rather than leaving the
 	 * bdevvp vnode lying around with no associated filesystem.
 	 */
 	if (vfinddev(dev, nvp->v_type, &ovp) == 0 || ovp->v_data != NULL) {
 		addalias(nvp, dev);
 		return (nvp);
 	}
 	/*
 	 * Discard unneeded vnode, but save its node specific data.
 	 * Note that if there is a lock, it is carried over in the
 	 * node specific data to the replacement vnode.
 	 */
 	vref(ovp);
 	ovp->v_data = nvp->v_data;
 	ovp->v_tag = nvp->v_tag;
 	nvp->v_data = NULL;
 	lockinit(&ovp->v_lock, PVFS, nvp->v_lock.lk_wmesg,
 	    nvp->v_lock.lk_timo, nvp->v_lock.lk_flags & LK_EXTFLG_MASK);
 	if (nvp->v_vnlock)
 		ovp->v_vnlock = &ovp->v_lock;
 	ops = ovp->v_op;
 	ovp->v_op = nvp->v_op;
 	if (VOP_ISLOCKED(nvp, curproc)) {
 		VOP_UNLOCK(nvp, 0, curproc);
 		vn_lock(ovp, LK_EXCLUSIVE | LK_RETRY, curproc);
 	}
 	nvp->v_op = ops;
 	insmntque(ovp, nvp->v_mount);
 	vrele(nvp);
 	vgone(nvp);
 	return (ovp);
 }
 
 /* This is a local helper function that do the same as addaliasu, but for a
  * dev_t instead of an udev_t. */
 static void
 addalias(nvp, dev)
 	struct vnode *nvp;
 	dev_t dev;
 {
 
 	KASSERT(nvp->v_type == VCHR, ("addalias on non-special vnode"));
 	nvp->v_rdev = dev;
 	mtx_lock(&spechash_mtx);
 	SLIST_INSERT_HEAD(&dev->si_hlist, nvp, v_specnext);
 	mtx_unlock(&spechash_mtx);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set if the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0)
 		mtx_lock(&vp->v_interlock);
 	if (vp->v_flag & VXLOCK) {
 		if (vp->v_vxproc == curproc) {
 			printf("VXLOCK interlock avoided\n");
 		} else {
 			vp->v_flag |= VXWANT;
 			msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 			    "vget", 0);
 			return (ENOENT);
 		}
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			mtx_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			mtx_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	mtx_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /* 
  * Increase the reference count of a vnode.
  */
 void
 vref(struct vnode *vp)
 {
 	mtx_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	mtx_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vrele: null vp"));
 
 	mtx_lock(&vp->v_interlock);
 
 	KASSERT(vp->v_writecount < vp->v_usecount, ("vrele: missed vn_close"));
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		mtx_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		mtx_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 /* 
  * Release an already locked vnode.  This give the same effects as
  * unlock+vrele(), but takes less time and avoids releasing and
  * re-aquiring the lock (as vrele() aquires the lock internally.)
  */
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	KASSERT(vp != NULL, ("vput: null vp"));
 	mtx_lock(&vp->v_interlock);
 	KASSERT(vp->v_writecount < vp->v_usecount, ("vput: missed vn_close"));
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		mtx_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 	int s;
 
   	s = splbio();
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	splx(s);
 }
 
 /*
  * Note that there is one less who cares about this vnode.  vdrop() is the
  * opposite of vhold().
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	if (vp->v_holdcnt <= 0)
 		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	splx(s);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	mtx_lock(&mntvnode_mtx);
 loop:
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		mtx_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			mtx_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			mtx_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			mtx_unlock(&mntvnode_mtx);
 			vgonel(vp, p);
 			mtx_lock(&mntvnode_mtx);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			mtx_unlock(&mntvnode_mtx);
 			if (vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			mtx_lock(&mntvnode_mtx);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		mtx_unlock(&vp->v_interlock);
 		busy++;
 	}
 	mtx_unlock(&mntvnode_mtx);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	vp->v_vxproc = curproc;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 * If the flush fails, just toss the buffers.
 	 */
 	if (flags & DOCLOSE) {
 		if (TAILQ_FIRST(&vp->v_dirtyblkhd) != NULL)
 			(void) vn_write_suspend_wait(vp, NULL, V_WAIT);
 		if (vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0) != 0)
 			vinvalbuf(vp, 0, NOCRED, p, 0, 0);
 	}
 
 	VOP_DESTROYVOBJECT(vp);
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, FNONBLOCK, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active) {
 		/*
 		 * Inline copy of vrele() since VOP_INACTIVE
 		 * has already been called.
 		 */
 		mtx_lock(&vp->v_interlock);
 		if (--vp->v_usecount <= 0) {
 #ifdef DIAGNOSTIC
 			if (vp->v_usecount < 0 || vp->v_writecount != 0) {
 				vprint("vclean: bad ref count", vp);
 				panic("vclean: ref cnt");
 			}
 #endif
 			vfree(vp);
 		}
 		mtx_unlock(&vp->v_interlock);
 	}
 
 	cache_purge(vp);
 	vp->v_vnlock = NULL;
 	lockdestroy(&vp->v_lock);
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	vp->v_vxproc = NULL;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	dev_t dev;
 
 	KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke"));
 
 	vp = ap->a_vp;
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 		    "vop_revokeall", 0);
 		return (0);
 	}
 	dev = vp->v_rdev;
 	for (;;) {
 		mtx_lock(&spechash_mtx);
 		vq = SLIST_FIRST(&dev->si_hlist);
 		mtx_unlock(&spechash_mtx);
 		if (!vq)
 			break;
 		vgone(vq);
 	}
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct mtx *inter_lkp;
 	struct proc *p;
 {
 
 	mtx_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			mtx_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	mtx_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	mtx_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		msleep((caddr_t)vp, &vp->v_interlock, PINOD | PDROP,
 		    "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	mtx_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if (vp->v_type == VCHR && vp->v_rdev != NULL && vp->v_rdev != NODEV) {
 		mtx_lock(&spechash_mtx);
 		SLIST_REMOVE(&vp->v_rdev->si_hlist, vp, vnode, v_specnext);
 		freedev(vp->v_rdev);
 		mtx_unlock(&spechash_mtx);
 		vp->v_rdev = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the
 	 * VDOOMED flag and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		mtx_lock(&vnode_free_list_mtx);
 		if (vp->v_flag & VFREE)
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		mtx_unlock(&vnode_free_list_mtx);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	mtx_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	struct vnode *vp;
 
 	mtx_lock(&spechash_mtx);
 	SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) {
 		if (type == vp->v_type) {
 			*vpp = vp;
 			mtx_unlock(&spechash_mtx);
 			return (1);
 		}
 	}
 	mtx_unlock(&spechash_mtx);
 	return (0);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int count;
 
 	count = 0;
 	mtx_lock(&spechash_mtx);
 	SLIST_FOREACH(vq, &vp->v_rdev->si_hlist, v_specnext)
 		count += vq->v_usecount;
 	mtx_unlock(&spechash_mtx);
 	return (count);
 }
 
 /*
  * Same as above, but using the dev_t as argument
  */
 int
 count_dev(dev)
 	dev_t dev;
 {
 	struct vnode *vp;
 
 	vp = SLIST_FIRST(&dev->si_hlist);
 	if (vp == NULL)
 		return (0);
 	return(vcount(vp));
 }
 
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	struct vnode *vp;
 {
 	char buf[96];
 
 	if (label != NULL)
 		printf("%s: %p: ", label, (void *)vp);
 	else
 		printf("%p: ", (void *)vp);
 	printf("type %s, usecount %d, writecount %d, refcount %d,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 #include <ddb/ddb.h>
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 		LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) {
 			if (VOP_ISLOCKED(vp, NULL))
 				vprint((char *)0, vp);
 		}
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, p);
 	}
 	mtx_unlock(&mountlist_mtx);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P((SYSCTL_HANDLER_ARGS));
 
 static int
 vfs_sysctl(SYSCTL_HANDLER_ARGS)
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #if 1 || defined(COMPAT_PRELITE2)
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 	/* XXX the below code does not compile; vfs_sysctl does not exist. */
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #if 1 || defined(COMPAT_PRELITE2)
 
 static int
 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS)
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* 1 || COMPAT_PRELITE2 */
 
 #if COMPILING_LINT
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode(SYSCTL_HANDLER_ARGS)
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	mtx_lock(&mountlist_mtx);
 	for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, p)) {
 			nmp = TAILQ_NEXT(mp, mnt_list);
 			continue;
 		}
 again:
 		mtx_lock(&mntvnode_mtx);
 		for (vp = LIST_FIRST(&mp->mnt_vnodelist);
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				mtx_unlock(&mntvnode_mtx);
 				goto again;
 			}
 			nvp = LIST_NEXT(vp, v_mntvnodes);
 			mtx_unlock(&mntvnode_mtx);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			mtx_lock(&mntvnode_mtx);
 		}
 		mtx_unlock(&mntvnode_mtx);
 		mtx_lock(&mountlist_mtx);
 		nmp = TAILQ_NEXT(mp, mnt_list);
 		vfs_unbusy(mp, p);
 	}
 	mtx_unlock(&mountlist_mtx);
 
 	return (0);
 }
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 
 	if (vp->v_rdev->si_mountpoint != NULL)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp;
 	struct proc *p;
 	int error;
 
 	if (curproc != NULL)
 		p = curproc;
 	else
 		p = initproc;	/* XXX XXX should this be proc0? */
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	while(!TAILQ_EMPTY(&mountlist)) {
 		mp = TAILQ_LAST(&mountlist, mntlist);
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			TAILQ_REMOVE(&mountlist, mp, mnt_list);
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		} else {
 			/* The unmount has removed mp from the mountlist */
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		bzero(&np->netc_anon, sizeof(np->netc_anon));
 		np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 		np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 		bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 		    sizeof(np->netc_anon.cr_groups));
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK | M_ZERO);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	bzero(&np->netc_anon, sizeof(np->netc_anon));
 	np->netc_anon.cr_uid = argp->ex_anon.cr_uid;
 	np->netc_anon.cr_ngroups = argp->ex_anon.cr_ngroups;
 	bcopy(argp->ex_anon.cr_groups, np->netc_anon.cr_groups,
 	    sizeof(np->netc_anon.cr_groups));
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* Helper for vfs_free_addrlist. */
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 /*
  * High level function to manipulate export options on a mount point
  * and the passed in netexport.
  * Struct export_args *argp is the variable used to twiddle options,
  * the structure is described in sys/mount.h
  */
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 /*
  * Used by the filesystems to determine if a given network address
  * (passed in 'nam') is present in thier exports list, returns a pointer
  * to struct netcred so that the filesystem can examine it for
  * access rights (read/write/etc).
  */
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	struct vm_object *obj;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = LIST_FIRST(&mp->mnt_vnodelist); vp != NULL; vp = nvp) {
 
 		nvp = LIST_NEXT(vp, v_mntvnodes);
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if (vp->v_flag & VXLOCK)	/* XXX: what if MNT_WAIT? */
 			continue;
 
 		if (flags != MNT_WAIT) {
 			if (VOP_GETVOBJECT(vp, &obj) != 0 ||
 			    (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
 				continue;
 			if (VOP_ISLOCKED(vp, NULL))
 				continue;
 		}
 
 		mtx_lock(&vp->v_interlock);
 		if (VOP_GETVOBJECT(vp, &obj) == 0 &&
 		    (obj->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (VOP_GETVOBJECT(vp, &obj) == 0) {
 					vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			mtx_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * vp must be locked when vfs_object_create is called.
  */
 int
 vfs_object_create(vp, p, cred)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 {
 	return (VOP_CREATEVOBJECT(vp, cred, p));
 }
 
 /*
  * Mark a vnode as free, putting it up for recycling.
  */
 void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 	KASSERT((vp->v_flag & VFREE) == 0, ("vnode already free"));
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	mtx_unlock(&vnode_free_list_mtx);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 /* 
  * Opposite of vfree() - mark a vnode as in use.
  */
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	mtx_lock(&vnode_free_list_mtx);
 	KASSERT((vp->v_flag & VFREE) != 0, ("vnode not free"));
 	TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 	freevnodes--;
 	mtx_unlock(&vnode_free_list_mtx);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		mtx_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 #define VN_KNOTE(vp, b) \
 	KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	mtx_lock(&vp->v_pollinfo.vpi_lock);
         VN_KNOTE(vp, NOTE_REVOKE);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	mtx_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 
 
 /*
  * Routine to create and manage a filesystem syncer vnode.
  */
 #define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
 static int	sync_fsync __P((struct  vop_fsync_args *));
 static int	sync_inactive __P((struct  vop_inactive_args *));
 static int	sync_reclaim  __P((struct  vop_reclaim_args *));
 #define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
 #define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
 static int	sync_print __P((struct vop_print_args *));
 #define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
 
 static vop_t **sync_vnodeop_p;
 static struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
 	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
 	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
 	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
 	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
 	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
 	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
 	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
 	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
 	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc sync_vnodeop_opv_desc =
 	{ &sync_vnodeop_p, sync_vnodeop_entries };
 
 VNODEOP_SET(sync_vnodeop_opv_desc);
 
 /*
  * Create a new filesystem syncer vnode for the specified mount point.
  */
 int
 vfs_allocate_syncvnode(mp)
 	struct mount *mp;
 {
 	struct vnode *vp;
 	static long start, incr, next;
 	int error;
 
 	/* Allocate a new vnode */
 	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
 		mp->mnt_syncer = NULL;
 		return (error);
 	}
 	vp->v_type = VNON;
 	/*
 	 * Place the vnode onto the syncer worklist. We attempt to
 	 * scatter them about on the list so that they will go off
 	 * at evenly distributed times even if all the filesystems
 	 * are mounted at once.
 	 */
 	next += incr;
 	if (next == 0 || next > syncer_maxdelay) {
 		start /= 2;
 		incr /= 2;
 		if (start == 0) {
 			start = syncer_maxdelay / 2;
 			incr = syncer_maxdelay;
 		}
 		next = start;
 	}
 	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
 	mp->mnt_syncer = vp;
 	return (0);
 }
 
 /*
  * Do a lazy sync of the filesystem.
  */
 static int
 sync_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *syncvp = ap->a_vp;
 	struct mount *mp = syncvp->v_mount;
 	struct proc *p = ap->a_p;
 	int asyncflag;
 
 	/*
 	 * We only need to do something if this is a lazy evaluation.
 	 */
 	if (ap->a_waitfor != MNT_LAZY)
 		return (0);
 
 	/*
 	 * Move ourselves to the back of the sync list.
 	 */
 	vn_syncer_add_to_worklist(syncvp, syncdelay);
 
 	/*
 	 * Walk the list of vnodes pushing all that are dirty and
 	 * not already on the sync list.
 	 */
 	mtx_lock(&mountlist_mtx);
 	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, p) != 0) {
 		mtx_unlock(&mountlist_mtx);
 		return (0);
 	}
 	if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) {
 		vfs_unbusy(mp, p);
 		return (0);
 	}
 	asyncflag = mp->mnt_flag & MNT_ASYNC;
 	mp->mnt_flag &= ~MNT_ASYNC;
 	vfs_msync(mp, MNT_NOWAIT);
 	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
 	if (asyncflag)
 		mp->mnt_flag |= MNT_ASYNC;
 	vn_finished_write(mp);
 	vfs_unbusy(mp, p);
 	return (0);
 }
 
 /*
  * The syncer vnode is no referenced.
  */
 static int
 sync_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	vgone(ap->a_vp);
 	return (0);
 }
 
 /*
  * The syncer vnode is no longer needed and is being decommissioned.
  *
  * Modifications to the worklist must be protected at splbio().
  */
 static int
 sync_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int s;
 
 	s = splbio();
 	vp->v_mount->mnt_syncer = NULL;
 	if (vp->v_flag & VONWORKLST) {
 		LIST_REMOVE(vp, v_synclist);
 		vp->v_flag &= ~VONWORKLST;
 	}
 	splx(s);
 
 	return (0);
 }
 
 /*
  * Print out a syncer vnode.
  */
 static int
 sync_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 	printf("syncer vnode");
 	if (vp->v_vnlock != NULL)
 		lockmgr_printinfo(vp->v_vnlock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * extract the dev_t from a VCHR
  */
 dev_t
 vn_todev(vp)
 	struct vnode *vp;
 {
 	if (vp->v_type != VCHR)
 		return (NODEV);
 	return (vp->v_rdev);
 }
 
 /*
  * Check if vnode represents a disk device
  */
 int
 vn_isdisk(vp, errp)
 	struct vnode *vp;
 	int *errp;
 {
 	struct cdevsw *cdevsw;
 
 	if (vp->v_type != VCHR) {
 		if (errp != NULL)
 			*errp = ENOTBLK;
 		return (0);
 	}
 	if (vp->v_rdev == NULL) {
 		if (errp != NULL)
 			*errp = ENXIO;
 		return (0);
 	}
 	cdevsw = devsw(vp->v_rdev);
 	if (cdevsw == NULL) {
 		if (errp != NULL)
 			*errp = ENXIO;
 		return (0);
 	}
 	if (!(cdevsw->d_flags & D_DISK)) {
 		if (errp != NULL)
 			*errp = ENOTBLK;
 		return (0);
 	}
 	if (errp != NULL)
 		*errp = 0;
 	return (1);
 }
 
 /*
  * Free data allocated by namei(); see namei(9) for details.
  */
 void
 NDFREE(ndp, flags)
      struct nameidata *ndp;
      const uint flags;
 {
 	if (!(flags & NDF_NO_FREE_PNBUF) &&
 	    (ndp->ni_cnd.cn_flags & HASBUF)) {
 		zfree(namei_zone, ndp->ni_cnd.cn_pnbuf);
 		ndp->ni_cnd.cn_flags &= ~HASBUF;
 	}
 	if (!(flags & NDF_NO_DVP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKPARENT) &&
 	    ndp->ni_dvp != ndp->ni_vp)
 		VOP_UNLOCK(ndp->ni_dvp, 0, ndp->ni_cnd.cn_proc);
 	if (!(flags & NDF_NO_DVP_RELE) &&
 	    (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) {
 		vrele(ndp->ni_dvp);
 		ndp->ni_dvp = NULL;
 	}
 	if (!(flags & NDF_NO_VP_UNLOCK) &&
 	    (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp)
 		VOP_UNLOCK(ndp->ni_vp, 0, ndp->ni_cnd.cn_proc);
 	if (!(flags & NDF_NO_VP_RELE) &&
 	    ndp->ni_vp) {
 		vrele(ndp->ni_vp);
 		ndp->ni_vp = NULL;
 	}
 	if (!(flags & NDF_NO_STARTDIR_RELE) &&
 	    (ndp->ni_cnd.cn_flags & SAVESTART)) {
 		vrele(ndp->ni_startdir);
 		ndp->ni_startdir = NULL;
 	}
 }
 
 /*
  * Common file system object access control check routine.  Accepts a
  * vnode's type, "mode", uid and gid, requested access mode, credentials,
  * and optional call-by-reference privused argument allowing vaccess()
  * to indicate to the caller whether privilege was used to satisfy the
  * request.  Returns 0 on success, or an errno on failure.
  */
 int
 vaccess(type, file_mode, file_uid, file_gid, acc_mode, cred, privused)
 	enum vtype type;
 	mode_t file_mode;
 	uid_t file_uid;
 	gid_t file_gid;
 	mode_t acc_mode;
 	struct ucred *cred;
 	int *privused;
 {
 	mode_t dac_granted;
 #ifdef CAPABILITIES
 	mode_t cap_granted;
 #endif
 
 	/*
 	 * Look for a normal, non-privileged way to access the file/directory
 	 * as requested.  If it exists, go with that.
 	 */
 
 	if (privused != NULL)
 		*privused = 0;
 
 	dac_granted = 0;
 
 	/* Check the owner. */
 	if (cred->cr_uid == file_uid) {
 		dac_granted |= VADMIN;
 		if (file_mode & S_IXUSR)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRUSR)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWUSR)
 			dac_granted |= VWRITE;
 
 		if ((acc_mode & dac_granted) == acc_mode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check the groups (first match) */
 	if (groupmember(file_gid, cred)) {
 		if (file_mode & S_IXGRP)
 			dac_granted |= VEXEC;
 		if (file_mode & S_IRGRP)
 			dac_granted |= VREAD;
 		if (file_mode & S_IWGRP)
 			dac_granted |= VWRITE;
 
 		if ((acc_mode & dac_granted) == acc_mode)
 			return (0);
 
 		goto privcheck;
 	}
 
 	/* Otherwise, check everyone else. */
 	if (file_mode & S_IXOTH)
 		dac_granted |= VEXEC;
 	if (file_mode & S_IROTH)
 		dac_granted |= VREAD;
 	if (file_mode & S_IWOTH)
 		dac_granted |= VWRITE;
 	if ((acc_mode & dac_granted) == acc_mode)
 		return (0);
 
 privcheck:
 	if (!suser_xxx(cred, NULL, PRISON_ROOT)) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 
 #ifdef CAPABILITIES
 	/*
 	 * Build a capability mask to determine if the set of capabilities
 	 * satisfies the requirements when combined with the granted mask
 	 * from above.
 	 * For each capability, if the capability is required, bitwise
 	 * or the request type onto the cap_granted mask.
 	 */
 	cap_granted = 0;
 	if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_EXECUTE, PRISON_ROOT))
 	    cap_granted |= VEXEC;
 
 	if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_READ_SEARCH, PRISON_ROOT))
 		cap_granted |= VREAD;
 
 	if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_DAC_WRITE, PRISON_ROOT))
 		cap_granted |= VWRITE;
 
 	if ((acc_mode & VADMIN) && ((dac_granted & VADMIN) == 0) &&
 	    !cap_check_xxx(cred, NULL, CAP_FOWNER, PRISON_ROOT))
 		cap_granted |= VADMIN;
 
 	if ((acc_mode & (cap_granted | dac_granted)) == acc_mode) {
 		/* XXX audit: privilege used */
 		if (privused != NULL)
 			*privused = 1;
 		return (0);
 	}
 #endif
 
 	return ((acc_mode & VADMIN) ? EPERM : EACCES);
 }
Index: head/sys/kern/vnode_if.src
===================================================================
--- head/sys/kern/vnode_if.src	(revision 75579)
+++ head/sys/kern/vnode_if.src	(revision 75580)
@@ -1,573 +1,565 @@
 #
 # Copyright (c) 1992, 1993
 #	The Regents of the University of California.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 # 3. All advertising materials mentioning features or use of this software
 #    must display the following acknowledgement:
 #	This product includes software developed by the University of
 #	California, Berkeley and its contributors.
 # 4. Neither the name of the University nor the names of its contributors
 #    may be used to endorse or promote products derived from this software
 #    without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 #	@(#)vnode_if.src	8.12 (Berkeley) 5/14/95
 # $FreeBSD$
 #
 
 #
 # Above each of the vop descriptors is a specification of the locking
 # protocol used by each vop call.  The first column is the name of
 # the variable, the remaining three columns are in, out and error
 # respectively.  The "in" column defines the lock state on input,
 # the "out" column defines the state on succesful return, and the
 # "error" column defines the locking state on error exit.
 #
 # The locking value can take the following values:
 # L: locked; not converted to type of lock.
 # A: any lock type.
 # S: locked with shared lock.
 # E: locked with exclusive lock for this process.
 # O: locked with exclusive lock for other process.
 # U: unlocked.
 # -: not applicable.  vnode does not yet (or no longer) exists.
 # =: the same on input and output, may be either L or U.
 # X: locked if not nil.
 #
 
 #
 #% islocked	vp	= = =
 #
 vop_islocked {
 	IN struct vnode *vp;
 	IN struct proc *p;
 };
 
 #
 #% lookup	dvp	L ? ?
 #% lookup	vpp	- L -
 #
 # XXX - the lookup locking protocol defies simple description and depends
 #	on the flags and operation fields in the (cnp) structure.  Note
 #	especially that *vpp may equal dvp and both may be locked.
 #
 vop_lookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 #
 #% cachedlookup	dvp	L ? ?
 #% cachedlookup	vpp	- L -
 #
 # This must be an exact copy of lookup.  See kern/vfs_cache.c for details.
 #
 vop_cachedlookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 #
 #% create	dvp	L L L
 #% create	vpp	- L -
 #
 vop_create {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% whiteout	dvp	L L L
 #
 vop_whiteout {
 	IN struct vnode *dvp;
 	IN struct componentname *cnp;
 	IN int flags;
 };
 
 #
 #% mknod	dvp	L L L
 #% mknod	vpp	- X -
 #
 vop_mknod {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% open		vp	L L L
 #
 vop_open {
 	IN struct vnode *vp;
 	IN int mode;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% close	vp	U U U
 #
 vop_close {
 	IN struct vnode *vp;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% access	vp	L L L
 #
 vop_access {
 	IN struct vnode *vp;
 	IN int mode;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% getattr	vp	= = =
 #
 vop_getattr {
 	IN struct vnode *vp;
 	OUT struct vattr *vap;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% setattr	vp	L L L
 #
 vop_setattr {
 	IN struct vnode *vp;
 	IN struct vattr *vap;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% read		vp	L L L
 #
 vop_read {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 #
 #% write	vp	L L L
 #
 vop_write {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 #
 #% lease	vp	= = =
 #
 vop_lease {
 	IN struct vnode *vp;
 	IN struct proc *p;
 	IN struct ucred *cred;
 	IN int flag;
 };
 
 #
 #% ioctl	vp	U U U
 #
 vop_ioctl {
 	IN struct vnode *vp;
 	IN u_long command;
 	IN caddr_t data;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% poll	vp	U U U
 #
 vop_poll {
 	IN struct vnode *vp;
 	IN int events;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% kqfilter	vp	U U U
 #
 vop_kqfilter {
 	IN struct vnode *vp;
 	IN struct knote *kn;
 };
 
 #
 #% revoke	vp	U U U
 #
 vop_revoke {
 	IN struct vnode *vp;
 	IN int flags;
 };
 
 #
 #% fsync	vp	L L L
 #
 vop_fsync {
 	IN struct vnode *vp;
 	IN struct ucred *cred;
 	IN int waitfor;
 	IN struct proc *p;
 };
 
 #
 #% remove	dvp	L L L
 #% remove	vp	L L L
 #
 vop_remove {
 	IN struct vnode *dvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% link		tdvp	L L L
 #% link		vp	U U U
 #
 vop_link {
 	IN struct vnode *tdvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% rename	fdvp	U U U
 #% rename	fvp	U U U
 #% rename	tdvp	L U U
 #% rename	tvp	X U U
 #
 vop_rename {
 	IN WILLRELE struct vnode *fdvp;
 	IN WILLRELE struct vnode *fvp;
 	IN struct componentname *fcnp;
 	IN WILLRELE struct vnode *tdvp;
 	IN WILLRELE struct vnode *tvp;
 	IN struct componentname *tcnp;
 };
 
 #
 #% mkdir	dvp	L L L
 #% mkdir	vpp	- L -
 #
 vop_mkdir {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% rmdir	dvp	L L L
 #% rmdir	vp	L L L
 #
 vop_rmdir {
 	IN struct vnode *dvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% symlink	dvp	L L L
 #% symlink	vpp	- U -
 #
 vop_symlink {
 	IN struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 	IN char *target;
 };
 
 #
 #% readdir	vp	L L L
 #
 vop_readdir {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	INOUT int *eofflag;
 	OUT int *ncookies;
 	INOUT u_long **cookies;
 };
 
 #
 #% readlink	vp	L L L
 #
 vop_readlink {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 };
 
 #
 #% inactive	vp	L U U
 #
 vop_inactive {
 	IN struct vnode *vp;
 	IN struct proc *p;
 };
 
 #
 #% reclaim	vp	U U U
 #
 vop_reclaim {
 	IN struct vnode *vp;
 	IN struct proc *p;
 };
 
 #
 #% lock		vp	? ? ?
 #
 vop_lock {
 	IN struct vnode *vp;
 	IN int flags;
 	IN struct proc *p;
 };
 
 #
 #% unlock	vp	L U L
 #
 vop_unlock {
 	IN struct vnode *vp;
 	IN int flags;
 	IN struct proc *p;
 };
 
 #
 #% bmap		vp	L L L
 #% bmap		vpp	- U -
 #
 vop_bmap {
 	IN struct vnode *vp;
 	IN daddr_t bn;
 	OUT struct vnode **vpp;
 	IN daddr_t *bnp;
 	OUT int *runp;
 	OUT int *runb;
 };
 
 #
 #% strategy	vp	L L L
 #
 vop_strategy {
 	IN struct vnode *vp;
 	IN struct buf *bp;
 };
 
 #
 #% getwritemount vp	= = =
 #
 vop_getwritemount {
 	IN struct vnode *vp;
 	OUT struct mount **mpp;
 };
 
 #
 #% print	vp	= = =
 #
 vop_print {
 	IN struct vnode *vp;
 };
 
 #
 #% pathconf	vp	L L L
 #
 vop_pathconf {
 	IN struct vnode *vp;
 	IN int name;
 	OUT register_t *retval;
 };
 
 #
 #% advlock	vp	U U U
 #
 vop_advlock {
 	IN struct vnode *vp;
 	IN caddr_t id;
 	IN int op;
 	IN struct flock *fl;
 	IN int flags;
 };
 
 #
 #% balloc	vp	L L L
 #
 vop_balloc {
 	IN struct vnode *vp;
 	IN off_t startoffset;
 	IN int size;
 	IN struct ucred *cred;
 	IN int flags;
 	OUT struct buf **bpp;
 };
 
 #
 #% reallocblks	vp	L L L
 #
 vop_reallocblks {
 	IN struct vnode *vp;
 	IN struct cluster_save *buflist;
 };
 
 #
 #% getpages	vp	L L L
 #
 vop_getpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int reqpage;
 	IN vm_ooffset_t offset;
 };
 
 #
 #% putpages	vp	L L L
 #
 vop_putpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int sync;
 	IN int *rtvals;
 	IN vm_ooffset_t offset;
 };
 
 #
 #% freeblks	vp	- - -
 #
 # This call is used by the filesystem to release blocks back to 
 # device-driver.  This is useful if the driver has a lengthy 
 # erase handling or similar.
 #
 
 vop_freeblks {
 	IN struct vnode *vp;
 	IN daddr_t addr;
 	IN daddr_t length;
 };
 
 #
-#% bwrite	vp	L L L
-#
-vop_bwrite {
-	IN struct vnode *vp;
-	IN struct buf *bp;
-};
-
-#
 #% getacl	vp	L L L
 #
 vop_getacl {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	OUT struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% setacl	vp	L L L
 #
 vop_setacl {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	IN struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% aclcheck	vp	= = =
 #
 vop_aclcheck {
 	IN struct vnode *vp;
 	IN acl_type_t type;
 	IN struct acl *aclp;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% getextattr	vp	L L L
 #
 vop_getextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	IN const char *name;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% setextattr	vp	L L L
 #
 vop_setextattr {
 	IN struct vnode *vp;
 	IN int attrnamespace;
 	IN const char *name;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% createvobject	vp	L L L
 #
 vop_createvobject {
 	IN struct vnode *vp;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% destroyvobject	vp	L L L
 #
 vop_destroyvobject {
 	IN struct vnode *vp;
 };
 
 #
 #% getvobject	vp	L L L
 #
 vop_getvobject {
 	IN struct vnode *vp;
 	OUT struct vm_object **objpp;
 };
Index: head/sys/nfs/nfs_bio.c
===================================================================
--- head/sys/nfs/nfs_bio.c	(revision 75579)
+++ head/sys/nfs/nfs_bio.c	(revision 75580)
@@ -1,1604 +1,1623 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
  * $FreeBSD$
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsnode.h>
 
+/*
+ * Just call nfs_writebp() with the force argument set to 1.
+ *
+ * NOTE: B_DONE may or may not be set in a_bp on call.
+ */
+static int
+nfs_bwrite(struct buf *bp)
+{
+	return (nfs_writebp(bp, 1, curproc));
+}
+
+struct buf_ops buf_ops_nfs = {
+	"buf_ops_nfs",
+	nfs_bwrite
+};
+
+
 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
 					struct proc *p));
 
 extern int nfs_numasync;
 extern int nfs_pbuf_freecnt;
 extern struct nfsstats nfsstats;
 
 /*
  * Vnode op for VM getpages.
  */
 int
 nfs_getpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_reqpage;
 		vm_ooffset_t a_offset;
 	} */ *ap;
 {
 	int i, error, nextoff, size, toff, count, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	struct nfsmount *nmp;
 	vm_page_t *pages;
 
 	vp = ap->a_vp;
 	p = curproc;				/* XXX */
 	cred = curproc->p_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
 
 	if (vp->v_object == NULL) {
 		printf("nfs_getpages: called with non-merged cache vnode??\n");
 		return VM_PAGER_ERROR;
 	}
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	npages = btoc(count);
 
 	/*
 	 * If the requested page is partially valid, just return it and
 	 * allow the pager to zero-out the blanks.  Partially valid pages
 	 * can only occur at the file EOF.
 	 */
 
 	{
 		vm_page_t m = pages[ap->a_reqpage];
 
 		if (m->valid != 0) {
 			/* handled by vm_fault now	  */
 			/* vm_page_zero_invalid(m, TRUE); */
 			for (i = 0; i < npages; ++i) {
 				if (i != ap->a_reqpage)
 					vnode_pager_freepage(pages[i]);
 			}
 			return(0);
 		}
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf(&nfs_pbuf_freecnt);
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = p;
 
 	error = nfs_readrpc(vp, &uio, cred);
 	pmap_qremove(kva, npages);
 
 	relpbuf(bp, &nfs_pbuf_freecnt);
 
 	if (error && (uio.uio_resid == count)) {
 		printf("nfs_getpages: error %d\n", error);
 		for (i = 0; i < npages; ++i) {
 			if (i != ap->a_reqpage)
 				vnode_pager_freepage(pages[i]);
 		}
 		return VM_PAGER_ERROR;
 	}
 
 	/*
 	 * Calculate the number of bytes read and validate only that number
 	 * of bytes.  Note that due to pending writes, size may be 0.  This
 	 * does not mean that the remaining data is invalid!
 	 */
 
 	size = count - uio.uio_resid;
 
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 		nextoff = toff + PAGE_SIZE;
 		m = pages[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= size) {
 			/*
 			 * Read operation filled an entire page
 			 */
 			m->valid = VM_PAGE_BITS_ALL;
 			vm_page_undirty(m);
 		} else if (size > toff) {
 			/*
 			 * Read operation filled a partial page.
 			 */
 			m->valid = 0;
 			vm_page_set_validclean(m, 0, size - toff);
 			/* handled by vm_fault now	  */
 			/* vm_page_zero_invalid(m, TRUE); */
 		}
 		
 		if (i != ap->a_reqpage) {
 			/*
 			 * Whether or not to leave the page activated is up in
 			 * the air, but we should put the page on a page queue
 			 * somewhere (it already is in the object).  Result:
 			 * It appears that emperical results show that
 			 * deactivating pages is best.
 			 */
 
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
 				if (m->flags & PG_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
 				vm_page_wakeup(m);
 			} else {
 				vnode_pager_freepage(m);
 			}
 		}
 	}
 	return 0;
 }
 
 /*
  * Vnode op for VM putpages.
  */
 int
 nfs_putpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 		vm_ooffset_t a_offset;
 	} */ *ap;
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	int iomode, must_commit, i, error, npages, count;
 	off_t offset;
 	int *rtvals;
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	vm_page_t *pages;
 
 	vp = ap->a_vp;
 	np = VTONFS(vp);
 	p = curproc;				/* XXX */
 	cred = curproc->p_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
 	rtvals = ap->a_rtvals;
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	for (i = 0; i < npages; i++) {
 		rtvals[i] = VM_PAGER_AGAIN;
 	}
 
 	/*
 	 * When putting pages, do not extend file past EOF.
 	 */
 
 	if (offset + count > np->n_size) {
 		count = np->n_size - offset;
 		if (count < 0)
 			count = 0;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf(&nfs_pbuf_freecnt);
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = offset;
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = p;
 
 	if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
 	    iomode = NFSV3WRITE_UNSTABLE;
 	else
 	    iomode = NFSV3WRITE_FILESYNC;
 
 	error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp, &nfs_pbuf_freecnt);
 
 	if (!error) {
 		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			vm_page_undirty(pages[i]);
 		}
 		if (must_commit)
 			nfs_clearcommit(vp->v_mount);
 	}
 	return rtvals[0];
 }
 
 /*
  * Vnode op for read using bio
  */
 int
 nfs_bioread(vp, uio, ioflag, cred)
 	register struct vnode *vp;
 	register struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register int biosize, i;
 	struct buf *bp = 0, *rabp;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn, rabn;
 	int bcount;
 	int seqcount;
 	int nra, error = 0, n = 0, on = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("nfs_read mode");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)	/* XXX VDIR cookies can be negative */
 		return (EINVAL);
 	p = uio->uio_procp;
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	if (vp->v_type != VDIR &&
 	    (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
 		return (EFBIG);
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
 	/*
 	 * For nfs, cache consistency can only be maintained approximately.
 	 * Although RFC1094 does not specify the criteria, the following is
 	 * believed to be compatible with the reference port.
 	 * For nqnfs, full cache consistency is maintained within the loop.
 	 * For nfs:
 	 * If the file's modify time on the server has changed since the
 	 * last read rpc or you have written to the file,
 	 * you may have lost data cache consistency with the
 	 * server, so flush all of the file's data out of the cache.
 	 * Then force a getattr rpc to ensure that you have up to date
 	 * attributes.
 	 * NB: This implies that cache data can be read when up to
 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
 	 * attributes this could be forced by setting n_attrstamp to 0 before
 	 * the VOP_GETATTR() call.
 	 */
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 		if (np->n_flag & NMODIFIED) {
 			if (vp->v_type != VREG) {
 				if (vp->v_type != VDIR)
 					panic("nfs: bioread, not dir");
 				nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	do {
 
 	    /*
 	     * Get a valid lease. If cached data is stale, flush it.
 	     */
 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, cred, p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE) ||
 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
 			if (vp->v_type == VDIR)
 			    nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 			    return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
 		    nfs_invaldir(vp);
 		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 		    if (error)
 			return (error);
 		}
 	    }
 	    if (np->n_flag & NQNFSNONCACHE) {
 		switch (vp->v_type) {
 		case VREG:
 			return (nfs_readrpc(vp, uio, cred));
 		case VLNK:
 			return (nfs_readlinkrpc(vp, uio, cred));
 		case VDIR:
 			break;
 		default:
 			printf(" NQNFSNONCACHE: type %x unexpected\n",	
 				vp->v_type);
 		};
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		/*
 		 * Start the read ahead(s), as required.
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
 		    for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
 			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
 			rabn = lbn + 1 + nra;
 			if (!incore(vp, rabn)) {
 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
 			    if (!rabp)
 				return (EINTR);
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= B_ASYNC;
 				rabp->b_iocmd = BIO_READ;
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred, p)) {
 				    rabp->b_flags |= B_INVAL;
 				    rabp->b_ioflags |= BIO_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				    break;
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		    }
 		}
 
 		/*
 		 * Obtain the buffer cache block.  Figure out the buffer size
 		 * when we are at EOF.  If we are modifying the size of the
 		 * buffer based on an EOF condition we need to hold 
 		 * nfs_rslock() through obtaining the buffer to prevent
 		 * a potential writer-appender from messing with n_size.
 		 * Otherwise we may accidently truncate the buffer and
 		 * lose dirty data.
 		 *
 		 * Note that bcount is *not* DEV_BSIZE aligned.
 		 */
 
 again:
 		bcount = biosize;
 		if ((off_t)lbn * biosize >= np->n_size) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
 			bcount = np->n_size - (off_t)lbn * biosize;
 		}
 		if (bcount != biosize) {
 			switch(nfs_rslock(np, p)) {
 			case ENOLCK:
 				goto again;
 				/* not reached */
 			case EINTR:
 			case ERESTART:
 				return(EINTR);
 				/* not reached */
 			default:
 				break;
 			}
 		}
 
 		bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 		if (bcount != biosize)
 			nfs_rsunlock(np, p);
 		if (!bp)
 			return (EINTR);
 
 		/*
 		 * If B_CACHE is not set, we must issue the read.  If this
 		 * fails, we return an error.
 		 */
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			brelse(bp);
 			return (error);
 		    }
 		}
 
 		/*
 		 * on is the offset into the current bp.  Figure out how many
 		 * bytes we can copy out of the bp.  Note that bcount is
 		 * NOT DEV_BSIZE aligned.
 		 *
 		 * Then figure out how many bytes we can copy into the uio.
 		 */
 
 		n = 0;
 		if (on < bcount)
 			n = min((unsigned)(bcount - on), uio->uio_resid);
 		break;
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
 		if (!bp)
 			return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			brelse(bp);
 			return (error);
 		    }
 		}
 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		on = 0;
 		break;
 	    case VDIR:
 		nfsstats.biocache_readdirs++;
 		if (np->n_direofoffset
 		    && uio->uio_offset >= np->n_direofoffset) {
 		    return (0);
 		}
 		lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
 		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
 		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
 		if (!bp)
 		    return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			    brelse(bp);
 		    }
 		    while (error == NFSERR_BAD_COOKIE) {
 			printf("got bad cookie vp %p bp %p\n", vp, bp);
 			nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
 			/*
 			 * Yuck! The directory has been modified on the
 			 * server. The only way to get the block is by
 			 * reading from the beginning to get all the
 			 * offset cookies.
 			 *
 			 * Leave the last bp intact unless there is an error.
 			 * Loop back up to the while if the error is another
 			 * NFSERR_BAD_COOKIE (double yuch!).
 			 */
 			for (i = 0; i <= lbn && !error; i++) {
 			    if (np->n_direofoffset
 				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
 				    return (0);
 			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
 			    if (!bp)
 				return (EINTR);
 			    if ((bp->b_flags & B_CACHE) == 0) {
 				    bp->b_iocmd = BIO_READ;
 				    vfs_busy_pages(bp, 0);
 				    error = nfs_doio(bp, cred, p);
 				    /*
 				     * no error + B_INVAL == directory EOF,
 				     * use the block.
 				     */
 				    if (error == 0 && (bp->b_flags & B_INVAL))
 					    break;
 			    }
 			    /*
 			     * An error will throw away the block and the
 			     * for loop will break out.  If no error and this
 			     * is not the block we want, we throw away the
 			     * block and go for the next one via the for loop.
 			     */
 			    if (error || i < lbn)
 				    brelse(bp);
 			}
 		    }
 		    /*
 		     * The above while is repeated if we hit another cookie
 		     * error.  If we hit an error and it wasn't a cookie error,
 		     * we give up.
 		     */
 		    if (error)
 			    return (error);
 		}
 
 		/*
 		 * If not eof and read aheads are enabled, start one.
 		 * (You need the current block first, so that you have the
 		 *  directory offset cookie of the next block.)
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
 		    (bp->b_flags & B_INVAL) == 0 &&
 		    (np->n_direofoffset == 0 ||
 		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
 		    !(np->n_flag & NQNFSNONCACHE) &&
 		    !incore(vp, lbn + 1)) {
 			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
 			if (rabp) {
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= B_ASYNC;
 				rabp->b_iocmd = BIO_READ;
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred, p)) {
 				    rabp->b_flags |= B_INVAL;
 				    rabp->b_ioflags |= BIO_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		}
 		/*
 		 * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
 		 * chopped for the EOF condition, we cannot tell how large
 		 * NFS directories are going to be until we hit EOF.  So
 		 * an NFS directory buffer is *not* chopped to its EOF.  Now,
 		 * it just so happens that b_resid will effectively chop it
 		 * to EOF.  *BUT* this information is lost if the buffer goes
 		 * away and is reconstituted into a B_CACHE state ( due to
 		 * being VMIO ) later.  So we keep track of the directory eof
 		 * in np->n_direofoffset and chop it off as an extra step 
 		 * right here.
 		 */
 		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
 		if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
 			n = np->n_direofoffset - uio->uio_offset;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 		break;
 	    };
 
 	    if (n > 0) {
 		    error = uiomove(bp->b_data + on, (int)n, uio);
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		break;
 	    case VLNK:
 		n = 0;
 		break;
 	    case VDIR:
 		/*
 		 * Invalidate buffer if caching is disabled, forcing a
 		 * re-read from the remote later.
 		 */
 		if (np->n_flag & NQNFSNONCACHE)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 	    }
 	    brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
 	return (error);
 }
 
 /*
  * Vnode op for write using bio
  */
 int
 nfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int biosize;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
 	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
 	int bcount;
 	int n, on, error = 0, iomode, must_commit;
 	int haverslock = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("nfs_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("nfs_write proc");
 #endif
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		return (np->n_error);
 	}
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	/*
 	 * Synchronously flush pending buffers if we are in synchronous
 	 * mode or if we are appending.
 	 */
 	if (ioflag & (IO_APPEND | IO_SYNC)) {
 		if (np->n_flag & NMODIFIED) {
 			np->n_attrstamp = 0;
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 				return (error);
 		}
 	}
 
 	/*
 	 * If IO_APPEND then load uio_offset.  We restart here if we cannot
 	 * get the append lock.
 	 */
 restart:
 	if (ioflag & IO_APPEND) {
 		np->n_attrstamp = 0;
 		error = VOP_GETATTR(vp, &vattr, cred, p);
 		if (error)
 			return (error);
 		uio->uio_offset = np->n_size;
 	}
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
 		return (EFBIG);
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * We need to obtain the rslock if we intend to modify np->n_size
 	 * in order to guarentee the append point with multiple contending
 	 * writers, to guarentee that no other appenders modify n_size
 	 * while we are trying to obtain a truncated buffer (i.e. to avoid
 	 * accidently truncating data written by another appender due to
 	 * the race), and to ensure that the buffer is populated prior to
 	 * our extending of the file.  We hold rslock through the entire
 	 * operation.
 	 *
 	 * Note that we do not synchronize the case where someone truncates
 	 * the file while we are appending to it because attempting to lock
 	 * this case may deadlock other parts of the system unexpectedly.
 	 */
 	if ((ioflag & IO_APPEND) ||
 	    uio->uio_offset + uio->uio_resid > np->n_size) {
 		switch(nfs_rslock(np, p)) {
 		case ENOLCK:
 			goto restart;
 			/* not reached */
 		case EINTR:
 		case ERESTART:
 			return(EINTR);
 			/* not reached */
 		default:
 			break;
 		}
 		haverslock = 1;
 	}
 
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, i don't think it matters
 	 */
 	if (p && uio->uio_offset + uio->uio_resid >
 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 		PROC_LOCK(p);
 		psignal(p, SIGXFSZ);
 		PROC_UNLOCK(p);
 		if (haverslock)
 			nfs_rsunlock(np, p);
 		return (EFBIG);
 	}
 
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 
 	do {
 		/*
 		 * Check for a valid write lease.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error)
 				break;
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 				np->n_brev = np->n_lrev;
 			}
 		}
 		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
 		    iomode = NFSV3WRITE_FILESYNC;
 		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
 		    if (must_commit)
 			    nfs_clearcommit(vp->v_mount);
 		    break;
 		}
 		nfsstats.biocache_writes++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize-1);
 		n = min((unsigned)(biosize - on), uio->uio_resid);
 again:
 		/*
 		 * Handle direct append and file extension cases, calculate
 		 * unaligned buffer size.
 		 */
 
 		if (uio->uio_offset == np->n_size && n) {
 			/*
 			 * Get the buffer (in its pre-append state to maintain
 			 * B_CACHE if it was previously set).  Resize the
 			 * nfsnode after we have locked the buffer to prevent
 			 * readers from reading garbage.
 			 */
 			bcount = on;
 			bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 			if (bp != NULL) {
 				long save;
 
 				np->n_size = uio->uio_offset + n;
 				np->n_flag |= NMODIFIED;
 				vnode_pager_setsize(vp, np->n_size);
 
 				save = bp->b_flags & B_CACHE;
 				bcount += n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
+				bp->b_magic = B_MAGIC_NFS;
+				bp->b_op = &buf_ops_nfs;
 			}
 		} else {
 			/*
 			 * Obtain the locked cache block first, and then 
 			 * adjust the file's size as appropriate.
 			 */
 			bcount = on + n;
 			if ((off_t)lbn * biosize + bcount < np->n_size) {
 				if ((off_t)(lbn + 1) * biosize < np->n_size)
 					bcount = biosize;
 				else
 					bcount = np->n_size - (off_t)lbn * biosize;
 			}
 
 			bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 			if (uio->uio_offset + n > np->n_size) {
 				np->n_size = uio->uio_offset + n;
 				np->n_flag |= NMODIFIED;
 				vnode_pager_setsize(vp, np->n_size);
 			}
 		}
 
 		if (!bp) {
 			error = EINTR;
 			break;
 		}
 
 		/*
 		 * Issue a READ if B_CACHE is not set.  In special-append
 		 * mode, B_CACHE is based on the buffer prior to the write
 		 * op and is typically set, avoiding the read.  If a read
 		 * is required in special append mode, the server will
 		 * probably send us a short-read since we extended the file
 		 * on our end, resulting in b_resid == 0 and, thusly, 
 		 * B_CACHE getting set.
 		 *
 		 * We can also avoid issuing the read if the write covers
 		 * the entire buffer.  We have to make sure the buffer state
 		 * is reasonable in this case since we will not be initiating
 		 * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 		 * more information.
 		 *
 		 * B_CACHE may also be set due to the buffer being cached
 		 * normally.
 		 */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			error = nfs_doio(bp, cred, p);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 		}
 		if (!bp) {
 			error = EINTR;
 			break;
 		}
 		if (bp->b_wcred == NOCRED) {
 			crhold(cred);
 			bp->b_wcred = cred;
 		}
 		np->n_flag |= NMODIFIED;
 
 		/*
 		 * If dirtyend exceeds file size, chop it down.  This should
 		 * not normally occur but there is an append race where it
 		 * might occur XXX, so we log it. 
 		 *
 		 * If the chopping creates a reverse-indexed or degenerate
 		 * situation with dirtyoff/end, we 0 both of them.
 		 */
 
 		if (bp->b_dirtyend > bcount) {
 			printf("NFS append race @%lx:%d\n", 
 			    (long)bp->b_blkno * DEV_BSIZE, 
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 		 * If the new write will leave a contiguous dirty
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 *
 		 * While it is possible to merge discontiguous writes due to 
 		 * our having a B_CACHE buffer ( and thus valid read data
 		 * for the hole), we don't because it could lead to 
 		 * significant cache coherency problems with multiple clients,
 		 * especially if locking is implemented later on.
 		 *
 		 * as an optimization we could theoretically maintain
 		 * a linked list of discontinuous areas, but we would still
 		 * have to commit them separately so there isn't much
 		 * advantage to it except perhaps a bit of asynchronization.
 		 */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			if (BUF_WRITE(bp) == EINTR)
 				return (EINTR);
 			goto again;
 		}
 
 		/*
 		 * Check for valid write lease and get one as required.
 		 * In case getblk() and/or bwrite() delayed us.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				brelse(bp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 				np->n_brev = np->n_lrev;
 				goto again;
 			}
 		}
 
 		error = uiomove((char *)bp->b_data + on, n, uio);
 
 		/*
 		 * Since this block is being modified, it must be written
 		 * again and not just committed.  Since write clustering does
 		 * not work for the stage 1 data write, only the stage 2
 		 * commit rpc, we have to clear B_CLUSTEROK as well.
 		 */
 		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			brelse(bp);
 			break;
 		}
 
 		/*
 		 * Only update dirtyoff/dirtyend if not a degenerate 
 		 * condition.
 		 */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = min(on, bp->b_dirtyoff);
 				bp->b_dirtyend = max((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_validclean(bp, on, n);
 		}
 
 		/*
 		 * If the lease is non-cachable or IO_SYNC do bwrite().
 		 *
 		 * IO_INVAL appears to be unused.  The idea appears to be
 		 * to turn off caching in this case.  Very odd.  XXX
 		 */
 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
 			if (ioflag & IO_INVAL)
 				bp->b_flags |= B_NOCACHE;
 			error = BUF_WRITE(bp);
 			if (error)
 				break;
 			if (np->n_flag & NQNFSNONCACHE) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 			}
 		} else if ((n + on) == biosize &&
 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 			bp->b_flags |= B_ASYNC;
 			(void)nfs_writebp(bp, 0, 0);
 		} else {
 			bdwrite(bp);
 		}
 	} while (uio->uio_resid > 0 && n > 0);
 
 	if (haverslock)
 		nfs_rsunlock(np, p);
 
 	return (error);
 }
 
 /*
  * Get an nfs cache block.
  *
  * Allocate a new one if the block isn't currently in the cache
  * and return the block marked busy. If the calling process is
  * interrupted by a signal for an interruptible mount point, return
  * NULL.
  *
  * The caller must carefully deal with the possible B_INVAL state of
  * the buffer.  nfs_doio() clears B_INVAL (and nfs_asyncio() clears it
  * indirectly), so synchronous reads can be issued without worrying about
  * the B_INVAL state.  We have to be a little more careful when dealing
  * with writes (see comments in nfs_write()) when extending a file past
  * its EOF.
  */
 static struct buf *
 nfs_getcacheblk(vp, bn, size, p)
 	struct vnode *vp;
 	daddr_t bn;
 	int size;
 	struct proc *p;
 {
 	register struct buf *bp;
 	struct mount *mp;
 	struct nfsmount *nmp;
 
 	mp = vp->v_mount;
 	nmp = VFSTONFS(mp);
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
 		while (bp == (struct buf *)0) {
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
 				return ((struct buf *)0);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else {
 		bp = getblk(vp, bn, size, 0, 0);
 	}
 
 	if (vp->v_type == VREG) {
 		int biosize;
 
 		biosize = mp->mnt_stat.f_iosize;
 		bp->b_blkno = bn * (biosize / DEV_BSIZE);
 	}
 	return (bp);
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int intrflg;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slpflag, slptimeo;
 
 	if (vp->v_flag & VXLOCK) {
 		return (0);
 	}
 
 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
 		intrflg = 0;
 	if (intrflg) {
 		slpflag = PCATCH;
 		slptimeo = 2 * hz;
 	} else {
 		slpflag = 0;
 		slptimeo = 0;
 	}
 	/*
 	 * First wait for any other process doing a flush to complete.
 	 */
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
 			return (EINTR);
 	}
 
 	/*
 	 * Now, flush as required.
 	 */
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
 				wakeup((caddr_t)&np->n_flag);
 			}
 			return (EINTR);
 		}
 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
 	}
 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
 	if (np->n_flag & NFLUSHWANT) {
 		np->n_flag &= ~NFLUSHWANT;
 		wakeup((caddr_t)&np->n_flag);
 	}
 	return (0);
 }
 
 /*
  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  * This is mainly to avoid queueing async I/O requests when the nfsiods
  * are all hung on a dead server.
  *
  * Note: nfs_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp
  * is eventually dequeued by the async daemon, nfs_doio() *will*.
  */
 int
 nfs_asyncio(bp, cred, procp)
 	register struct buf *bp;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	struct nfsmount *nmp;
 	int i;
 	int gotiod;
 	int slpflag = 0;
 	int slptimeo = 0;
 	int error;
 
 	/*
 	 * If no async daemons then return EIO to force caller to run the rpc
 	 * synchronously.
 	 */
 	if (nfs_numasync == 0)
 		return (EIO);
 
 	nmp = VFSTONFS(bp->b_vp->v_mount);
 
 	/*
 	 * Commits are usually short and sweet so lets save some cpu and 
 	 * leave the async daemons for more important rpc's (such as reads
 	 * and writes).
 	 */
 	if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
 	    (nmp->nm_bufqiods > nfs_numasync / 2)) {
 		return(EIO);
 	}
 
 again:
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	gotiod = FALSE;
 
 	/*
 	 * Find a free iod to process this request.
 	 */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 		if (nfs_iodwant[i]) {
 			/*
 			 * Found one, so wake it up and tell it which
 			 * mount to process.
 			 */
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waking iod %d for mount %p\n",
 				 i, nmp));
 			nfs_iodwant[i] = (struct proc *)0;
 			nfs_iodmount[i] = nmp;
 			nmp->nm_bufqiods++;
 			wakeup((caddr_t)&nfs_iodwant[i]);
 			gotiod = TRUE;
 			break;
 		}
 
 	/*
 	 * If none are free, we may already have an iod working on this mount
 	 * point.  If so, it will process our request.
 	 */
 	if (!gotiod) {
 		if (nmp->nm_bufqiods > 0) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: %d iods are already processing mount %p\n",
 				 nmp->nm_bufqiods, nmp));
 			gotiod = TRUE;
 		}
 	}
 
 	/*
 	 * If we have an iod which can process the request, then queue
 	 * the buffer.
 	 */
 	if (gotiod) {
 		/*
 		 * Ensure that the queue never grows too large.  We still want
 		 * to asynchronize so we block rather then return EIO.
 		 */
 		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
 			nmp->nm_bufqwant = TRUE;
 			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
 				       "nfsaio", slptimeo);
 			if (error) {
 				if (nfs_sigintr(nmp, NULL, procp))
 					return (EINTR);
 				if (slpflag == PCATCH) {
 					slpflag = 0;
 					slptimeo = 2 * hz;
 				}
 			}
 			/*
 			 * We might have lost our iod while sleeping,
 			 * so check and loop if nescessary.
 			 */
 			if (nmp->nm_bufqiods == 0) {
 				NFS_DPF(ASYNCIO,
 					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
 				goto again;
 			}
 		}
 
 		if (bp->b_iocmd == BIO_READ) {
 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_rcred = cred;
 			}
 		} else {
 			bp->b_flags |= B_WRITEINPROG;
 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_wcred = cred;
 			}
 		}
 
 		BUF_KERNPROC(bp);
 		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 		nmp->nm_bufqlen++;
 		return (0);
 	}
 
 	/*
 	 * All the iods are busy on other mounts, so return EIO to
 	 * force the caller to process the i/o synchronously.
 	 */
 	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
 	return (EIO);
 }
 
 /*
  * Do an I/O operation to/from a cache block. This may be called
  * synchronously or from an nfsiod.
  */
 int
 nfs_doio(bp, cr, p)
 	struct buf *bp;
 	struct ucred *cr;
 	struct proc *p;
 {
 	struct uio *uiop;
 	struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error = 0, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
 	vp = bp->b_vp;
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_procp = p;
 
 	/*
 	 * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
 	 * do this here so we do not have to do it in all the code that
 	 * calls us.
 	 */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
 
 	/*
 	 * Historically, paging was done with physio, but no more.
 	 */
 	if (bp->b_flags & B_PHYS) {
 	    /*
 	     * ...though reading /dev/drum still gets us here.
 	     */
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    /* mapping was done by vmapbuf() */
 	    io.iov_base = bp->b_data;
 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 	    if (bp->b_iocmd == BIO_READ) {
 		uiop->uio_rw = UIO_READ;
 		nfsstats.read_physios++;
 		error = nfs_readrpc(vp, uiop, cr);
 	    } else {
 		int com;
 
 		iomode = NFSV3WRITE_DATASYNC;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_physios++;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
 	    }
 	    if (error) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 	    }
 	} else if (bp->b_iocmd == BIO_READ) {
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    io.iov_base = bp->b_data;
 	    uiop->uio_rw = UIO_READ;
 	    switch (vp->v_type) {
 	    case VREG:
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop, cr);
 		if (!error) {
 		    if (uiop->uio_resid) {
 			/*
 			 * If we had a short read with no error, we must have
 			 * hit a file hole.  We should zero-fill the remainder.
 			 * This can also occur if the server hits the file EOF.
 			 *
 			 * Holes used to be able to occur due to pending 
 			 * writes, but that is not possible any longer.
 			 */
 			int nread = bp->b_bcount - uiop->uio_resid;
 			int left  = bp->b_bcount - nread;
 
 			if (left > 0)
 				bzero((char *)bp->b_data + nread, left);
 			uiop->uio_resid = 0;
 		    }
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
 			  NQNFS_CKINVALID(vp, np, ND_READ) &&
 			  np->n_lrev != np->n_brev) ||
 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 			uprintf("Process killed due to text file modification\n");
 			PROC_LOCK(p);
 			psignal(p, SIGKILL);
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 		}
 		break;
 	    case VLNK:
 		uiop->uio_offset = (off_t)0;
 		nfsstats.readlink_bios++;
 		error = nfs_readlinkrpc(vp, uiop, cr);
 		break;
 	    case VDIR:
 		nfsstats.readdir_bios++;
 		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 			error = nfs_readdirplusrpc(vp, uiop, cr);
 			if (error == NFSERR_NOTSUPP)
 				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 		}
 		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 			error = nfs_readdirrpc(vp, uiop, cr);
 		/*
 		 * end-of-directory sets B_INVAL but does not generate an
 		 * error.
 		 */
 		if (error == 0 && uiop->uio_resid == bp->b_bcount)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
 	    };
 	    if (error) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
 	    /* 
 	     * If we only need to commit, try to commit
 	     */
 	    if (bp->b_flags & B_NEEDCOMMIT) {
 		    int retv;
 		    off_t off;
 
 		    off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		    bp->b_flags |= B_WRITEINPROG;
 		    retv = nfs_commit(
 				bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 				bp->b_wcred, p);
 		    bp->b_flags &= ~B_WRITEINPROG;
 		    if (retv == 0) {
 			    bp->b_dirtyoff = bp->b_dirtyend = 0;
 			    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 			    bp->b_resid = 0;
 			    bufdone(bp);
 			    return (0);
 		    }
 		    if (retv == NFSERR_STALEWRITEVERF) {
 			    nfs_clearcommit(bp->b_vp->v_mount);
 		    }
 	    }
 
 	    /*
 	     * Setup for actual write
 	     */
 
 	    if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
 		bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
 
 	    if (bp->b_dirtyend > bp->b_dirtyoff) {
 		io.iov_len = uiop->uio_resid = bp->b_dirtyend
 		    - bp->b_dirtyoff;
 		uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
 		    + bp->b_dirtyoff;
 		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_bios++;
 
 		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 		    iomode = NFSV3WRITE_UNSTABLE;
 		else
 		    iomode = NFSV3WRITE_FILESYNC;
 
 		bp->b_flags |= B_WRITEINPROG;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 
 		/*
 		 * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
 		 * to cluster the buffers needing commit.  This will allow
 		 * the system to submit a single commit rpc for the whole
 		 * cluster.  We can do this even if the buffer is not 100% 
 		 * dirty (relative to the NFS blocksize), so we optimize the
 		 * append-to-file-case.
 		 *
 		 * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
 		 * cleared because write clustering only works for commit
 		 * rpc's, not for the data portion of the write).
 		 */
 
 		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 		    bp->b_flags |= B_NEEDCOMMIT;
 		    if (bp->b_dirtyoff == 0
 			&& bp->b_dirtyend == bp->b_bcount)
 			bp->b_flags |= B_CLUSTEROK;
 		} else {
 		    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 		}
 		bp->b_flags &= ~B_WRITEINPROG;
 
 		/*
 		 * For an interrupted write, the buffer is still valid
 		 * and the write hasn't been pushed to the server yet,
 		 * so we can't set BIO_ERROR and report the interruption
 		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 		 * is not relevant, so the rpc attempt is essentially
 		 * a noop.  For the case of a V3 write rpc not being
 		 * committed to stable storage, the block is still
 		 * dirty and requires either a commit rpc or another
 		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
 		 * the block is reused. This is indicated by setting
 		 * the B_DELWRI and B_NEEDCOMMIT flags.
 		 *
 		 * If the buffer is marked B_PAGING, it does not reside on
 		 * the vp's paging queues so we cannot call bdirty().  The
 		 * bp in this case is not an NFS cache block so we should
 		 * be safe. XXX
 		 */
     		if (error == EINTR
 		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 			int s;
 
 			s = splbio();
 			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 			if ((bp->b_flags & B_PAGING) == 0) {
 			    bdirty(bp);
 			    bp->b_flags &= ~B_DONE;
 			}
 			if (error && (bp->b_flags & B_ASYNC) == 0)
 			    bp->b_flags |= B_EINTR;
 			splx(s);
 	    	} else {
 		    if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = np->n_error = error;
 			np->n_flag |= NWRITEERR;
 		    }
 		    bp->b_dirtyoff = bp->b_dirtyend = 0;
 		}
 	    } else {
 		bp->b_resid = 0;
 		bufdone(bp);
 		return (0);
 	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
 	    nfs_clearcommit(vp->v_mount);
 	bufdone(bp);
 	return (error);
 }
Index: head/sys/nfs/nfs_vnops.c
===================================================================
--- head/sys/nfs/nfs_vnops.c	(revision 75579)
+++ head/sys/nfs/nfs_vnops.c	(revision 75580)
@@ -1,3400 +1,3385 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
  * $FreeBSD$
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <miscfs/fifofs/fifo.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *, const char *, int,
 			struct ucred *, struct proc *, struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
-static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
-	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
 				   int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   const char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_int32_t nfs_true, nfs_false;
 extern u_int32_t nfs_xdrneg1;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfsv3_commit_on_close = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 
 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
 #if 0
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
 
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 
 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
 #endif
 
 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
 static int
 nfs3_access_otw(struct vnode *vp,
 		int wmode,
 		struct proc *p,
 		struct ucred *cred)
 {
 	const int v3 = 1;
 	u_int32_t *tl;
 	int error = 0, attrflag;
 	
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	caddr_t bpos, dpos, cp2;
 	register int32_t t1, t2;
 	register caddr_t cp;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 
 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
 	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 	nfsm_fhtom(vp, v3);
 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(wmode); 
 	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
 	nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		np->n_mode = rmode;
 		np->n_modeuid = cred->cr_uid;
 		np->n_modestamp = time_second;
 	}
 	nfsm_reqdone;
 	return error;
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	int error = 0;
 	u_int32_t mode, wmode;
 	int v3 = NFS_ISV3(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
 		    (ap->a_cred->cr_uid == np->n_modeuid) &&
 		    ((np->n_mode & mode) == mode)) {
 			nfsstats.accesscache_hits++;
 		} else {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			nfsstats.accesscache_misses++;
 		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
 			if (!error) {
 				if ((np->n_mode & mode) != mode) {
 					error = EACCES;
 				}
 			}
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0)
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
 #ifdef DIAGNOSTIC
 		printf("open eacces vtyp=%d\n",vp->v_type);
 #endif
 		return (EACCES);
 	}
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    /*
 		     * Under NFSv3 we have dirty buffers to dispose of.  We
 		     * must flush them to the NFS server.  We have the option
 		     * of waiting all the way through the commit rpc or just
 		     * waiting for the initial write.  The default is to only
 		     * wait through the initial write so the data is in the
 		     * server's cache, which is roughly similar to the state
 		     * a standard disk subsystem leaves the file in on close().
 		     *
 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
 		     * potential races with other processes, and certainly
 		     * cannot clear it if we don't commit.
 		     */
 		    int cm = nfsv3_commit_on_close ? 1 : 0;
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, cm);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else {
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		}
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 
 	if (v3 && nfsaccess_cache_timeout > 0) {
 		nfsstats.accesscache_misses++;
 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
 			return (0);
 	}
 
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error) {
 		nfsm_loadattr(vp, ap->a_vap);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags is not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			vnode_pager_setsize(vp, vap->va_size);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error) {
 				vnode_pager_setsize(vp, np->n_size);
  				return (error);
 			    }
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_int32_t *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		nfsm_v3attrbuild(vap, TRUE);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (mode_t)VNOVAL)
 			sp->sa_mode = nfs_xdrneg1;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = nfs_xdrneg1;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = nfs_xdrneg1;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	u_int32_t *tl;
 	caddr_t cp;
 	int32_t t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	*vpp = NULLVP;
 	cnp->cn_flags &= ~PDIRUNLOCK;
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			cnp->cn_flags |= PDIRUNLOCK;
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN)) {
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 				if (error == 0)
 					cnp->cn_flags &= ~PDIRUNLOCK;
 			}
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN)) {
 				VOP_UNLOCK(dvp, 0, p);
 				cnp->cn_flags |= PDIRUNLOCK;
 			}
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error) {
 			cnp->cn_flags |= PDIRUNLOCK;
 			return (error);
 		}
 		cnp->cn_flags &= ~PDIRUNLOCK;
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent) {
 			VOP_UNLOCK(dvp, 0, p);
 			cnp->cn_flags |= PDIRUNLOCK;
 		}
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN)) {
 			error = vn_lock(dvp, LK_EXCLUSIVE, p);
 			if (error) {
 				cnp->cn_flags |= PDIRUNLOCK;
 		    		vput(newvp);
 				return (error);
 			}
 		} else
 			cnp->cn_flags |= PDIRUNLOCK;
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN)) {
 			cnp->cn_flags |= PDIRUNLOCK;
 			VOP_UNLOCK(dvp, 0, p);
 		}
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent) {
 				VOP_UNLOCK(dvp, 0, p);
 				cnp->cn_flags |= PDIRUNLOCK;
 			}
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		if (len == NFS_MAXPATHLEN) {
 			struct nfsnode *np = VTONFS(vp);
 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 				len = np->n_size;
 		}
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			register u_int32_t x;
 
 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;	/* "begin offset" */
 			*tl++ = x;	/* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;	/* total to this offset */
 			*tl = x;	/* size of this write */
 		}
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					m_freem(mrep);
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		if (error)
 			break;
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_int32_t rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = nfs_xdrneg1;
 	else {
 		return (EOPNOTSUPP);
 	}
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		nfsm_v3attrbuild(vap, FALSE);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
 			*tl = txdr_unsigned(uminor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
 #ifdef INET
 			if (!TAILQ_EMPTY(&in_ifaddrhead))
 				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 			else
 #endif
 				*tl++ = create_verf;
 			*tl = ++create_verf;
 		} else {
 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 			nfsm_v3attrbuild(vap, FALSE);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL)) {
 		/*
 		 * We are normally called with only a partially initialized
 		 * VAP.  Since the NFSv3 spec says that server may use the
 		 * file attributes to store the verifier, the spec requires
 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
 		 * in atime, but we can't really assume that all servers will
 		 * so we ensure that our SETATTR sets both atime and mtime.
 		 */
 		if (vap->va_mtime.tv_sec == VNOVAL)
 			vfs_timestamp(&vap->va_mtime);
 		if (vap->va_atime.tv_sec == VNOVAL)
 			vap->va_atime = vap->va_mtime;
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	}
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	np->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	const char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 */
 
 	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
 	if (tvp)
 	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	const char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	const char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3;
 
 	if (vp->v_mount != tdvp->v_mount) {
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	v3 = NFS_ISV3(vp);
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 
 	/*
 	 * Issue the NFS request and get the rpc response.
 	 *
 	 * Only NFSv3 responses returning an error of 0 actually return
 	 * a file handle that can be converted into newvp without having
 	 * to do an extra lookup rpc.
 	 */
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (error == 0)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 
 	/*
 	 * out code jumps -> here, mrep is also freed.
 	 */
 
 	nfsm_reqdone;
 
 	/*
 	 * If we get an EEXIST error, silently convert it to no-error
 	 * in case of an NFS retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 
 	/*
 	 * If we do not have (or no longer have) an error, and we could
 	 * not extract the newvp from the response due to the request being
 	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
 	 * to obtain a newvp to return.  
 	 */
 	if (error == 0 && newvp == NULL) {
 		struct nfsnode *np = NULL;
 
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_proc, &np);
 		if (!error)
 			newvp = NFSTOV(np);
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		*ap->a_vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	if (dvp == vp)
 		return (EINVAL);
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp = NULL;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				fileno = fxdr_hyper(tl);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			fileno = fxdr_hyper(tl);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np);
 				    if (error)
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit && bigenough) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    if (newvp == vp)
 				vrele(newvp);
 			    else
 				vput(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	error = nfs_renameit(dvp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	const char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 int
 nfs_commit(vp, offset, cnt, cred, procp)
 	struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED, p))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT) ||
 			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to 
 			 * initiate I/O on the bp.
 			 *
 			 * Note: to avoid loopback deadlocks, we do not
 			 * assign b_runningbufspace.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= B_WRITEINPROG;
 			vfs_busy_pages(bp, 1);
 
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS 
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				s = splbio();
 				vp->v_numoutput++;
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~B_DONE;
 				bp->b_ioflags &= ~BIO_ERROR;
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				splx(s);
 				bufdone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "nfsfsync", slpflag, slptimeo);
 			splx(s);
 			if (error == 0)
 				panic("nfs_fsync: inconsistent lock");
 			if (error == ENOLCK)
 				goto loop;
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= B_ASYNC | B_WRITEINPROG;
 		splx(s);
 		BUF_WRITE(bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%x",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
- * Just call nfs_writebp() with the force argument set to 1.
- *
- * NOTE: B_DONE may or may not be set in a_bp on call.
- */
-static int
-nfs_bwrite(ap)
-	struct vop_bwrite_args /* {
-		struct vnode *a_bp;
-	} */ *ap;
-{
-	return (nfs_writebp(ap->a_bp, 1, curproc));
-}
-
-/*
- * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
- * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
- * B_CACHE if this is a VMIO buffer.
+ * This is the "real" nfs::bwrite(struct buf*).
+ * B_WRITEINPROG isn't set unless the force flag is one and it 
+ * handles the B_NEEDCOMMIT flag.
+ * We set B_CACHE if this is a VMIO buffer.
  */
 int
 nfs_writebp(bp, force, procp)
 	register struct buf *bp;
 	int force;
 	struct proc *procp;
 {
 	int s;
 	int oldflags = bp->b_flags;
 #if 0
 	int retv = 1;
 	off_t off;
 #endif
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not locked???");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return(0);
 	}
 
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 
 	s = splbio();
 	bundirty(bp);
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_iocmd = BIO_WRITE;
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 
 	/*
 	 * Note: to avoid loopback deadlocks, we do not
 	 * assign b_runningbufspace.
 	 */
 	vfs_busy_pages(bp, 1);
 
 	if (force)
 		bp->b_flags |= B_WRITEINPROG;
 	BUF_KERNPROC(bp);
 	BUF_STRATEGY(bp);
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 
 		if (oldflags & B_DELWRI) {
 			s = splbio();
 			reassignbuf(bp, bp->b_vp);
 			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		getnanotime(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/nfsclient/nfs_bio.c
===================================================================
--- head/sys/nfsclient/nfs_bio.c	(revision 75579)
+++ head/sys/nfsclient/nfs_bio.c	(revision 75580)
@@ -1,1604 +1,1623 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
  * $FreeBSD$
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsnode.h>
 
+/*
+ * Just call nfs_writebp() with the force argument set to 1.
+ *
+ * NOTE: B_DONE may or may not be set in a_bp on call.
+ */
+static int
+nfs_bwrite(struct buf *bp)
+{
+	return (nfs_writebp(bp, 1, curproc));
+}
+
+struct buf_ops buf_ops_nfs = {
+	"buf_ops_nfs",
+	nfs_bwrite
+};
+
+
 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
 					struct proc *p));
 
 extern int nfs_numasync;
 extern int nfs_pbuf_freecnt;
 extern struct nfsstats nfsstats;
 
 /*
  * Vnode op for VM getpages.
  */
 int
 nfs_getpages(ap)
 	struct vop_getpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_reqpage;
 		vm_ooffset_t a_offset;
 	} */ *ap;
 {
 	int i, error, nextoff, size, toff, count, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	struct nfsmount *nmp;
 	vm_page_t *pages;
 
 	vp = ap->a_vp;
 	p = curproc;				/* XXX */
 	cred = curproc->p_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
 
 	if (vp->v_object == NULL) {
 		printf("nfs_getpages: called with non-merged cache vnode??\n");
 		return VM_PAGER_ERROR;
 	}
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	npages = btoc(count);
 
 	/*
 	 * If the requested page is partially valid, just return it and
 	 * allow the pager to zero-out the blanks.  Partially valid pages
 	 * can only occur at the file EOF.
 	 */
 
 	{
 		vm_page_t m = pages[ap->a_reqpage];
 
 		if (m->valid != 0) {
 			/* handled by vm_fault now	  */
 			/* vm_page_zero_invalid(m, TRUE); */
 			for (i = 0; i < npages; ++i) {
 				if (i != ap->a_reqpage)
 					vnode_pager_freepage(pages[i]);
 			}
 			return(0);
 		}
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf(&nfs_pbuf_freecnt);
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = p;
 
 	error = nfs_readrpc(vp, &uio, cred);
 	pmap_qremove(kva, npages);
 
 	relpbuf(bp, &nfs_pbuf_freecnt);
 
 	if (error && (uio.uio_resid == count)) {
 		printf("nfs_getpages: error %d\n", error);
 		for (i = 0; i < npages; ++i) {
 			if (i != ap->a_reqpage)
 				vnode_pager_freepage(pages[i]);
 		}
 		return VM_PAGER_ERROR;
 	}
 
 	/*
 	 * Calculate the number of bytes read and validate only that number
 	 * of bytes.  Note that due to pending writes, size may be 0.  This
 	 * does not mean that the remaining data is invalid!
 	 */
 
 	size = count - uio.uio_resid;
 
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 		nextoff = toff + PAGE_SIZE;
 		m = pages[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= size) {
 			/*
 			 * Read operation filled an entire page
 			 */
 			m->valid = VM_PAGE_BITS_ALL;
 			vm_page_undirty(m);
 		} else if (size > toff) {
 			/*
 			 * Read operation filled a partial page.
 			 */
 			m->valid = 0;
 			vm_page_set_validclean(m, 0, size - toff);
 			/* handled by vm_fault now	  */
 			/* vm_page_zero_invalid(m, TRUE); */
 		}
 		
 		if (i != ap->a_reqpage) {
 			/*
 			 * Whether or not to leave the page activated is up in
 			 * the air, but we should put the page on a page queue
 			 * somewhere (it already is in the object).  Result:
 			 * It appears that emperical results show that
 			 * deactivating pages is best.
 			 */
 
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
 				if (m->flags & PG_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
 				vm_page_wakeup(m);
 			} else {
 				vnode_pager_freepage(m);
 			}
 		}
 	}
 	return 0;
 }
 
 /*
  * Vnode op for VM putpages.
  */
 int
 nfs_putpages(ap)
 	struct vop_putpages_args /* {
 		struct vnode *a_vp;
 		vm_page_t *a_m;
 		int a_count;
 		int a_sync;
 		int *a_rtvals;
 		vm_ooffset_t a_offset;
 	} */ *ap;
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_offset_t kva;
 	struct buf *bp;
 	int iomode, must_commit, i, error, npages, count;
 	off_t offset;
 	int *rtvals;
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	struct nfsmount *nmp;
 	struct nfsnode *np;
 	vm_page_t *pages;
 
 	vp = ap->a_vp;
 	np = VTONFS(vp);
 	p = curproc;				/* XXX */
 	cred = curproc->p_ucred;		/* XXX */
 	nmp = VFSTONFS(vp->v_mount);
 	pages = ap->a_m;
 	count = ap->a_count;
 	rtvals = ap->a_rtvals;
 	npages = btoc(count);
 	offset = IDX_TO_OFF(pages[0]->pindex);
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	for (i = 0; i < npages; i++) {
 		rtvals[i] = VM_PAGER_AGAIN;
 	}
 
 	/*
 	 * When putting pages, do not extend file past EOF.
 	 */
 
 	if (offset + count > np->n_size) {
 		count = np->n_size - offset;
 		if (count < 0)
 			count = 0;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf(&nfs_pbuf_freecnt);
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, pages, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = offset;
 	uio.uio_resid = count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = p;
 
 	if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
 	    iomode = NFSV3WRITE_UNSTABLE;
 	else
 	    iomode = NFSV3WRITE_FILESYNC;
 
 	error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp, &nfs_pbuf_freecnt);
 
 	if (!error) {
 		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			vm_page_undirty(pages[i]);
 		}
 		if (must_commit)
 			nfs_clearcommit(vp->v_mount);
 	}
 	return rtvals[0];
 }
 
 /*
  * Vnode op for read using bio
  */
 int
 nfs_bioread(vp, uio, ioflag, cred)
 	register struct vnode *vp;
 	register struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register int biosize, i;
 	struct buf *bp = 0, *rabp;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn, rabn;
 	int bcount;
 	int seqcount;
 	int nra, error = 0, n = 0, on = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("nfs_read mode");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)	/* XXX VDIR cookies can be negative */
 		return (EINVAL);
 	p = uio->uio_procp;
 
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	if (vp->v_type != VDIR &&
 	    (uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
 		return (EFBIG);
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	seqcount = (int)((off_t)(ioflag >> 16) * biosize / BKVASIZE);
 	/*
 	 * For nfs, cache consistency can only be maintained approximately.
 	 * Although RFC1094 does not specify the criteria, the following is
 	 * believed to be compatible with the reference port.
 	 * For nqnfs, full cache consistency is maintained within the loop.
 	 * For nfs:
 	 * If the file's modify time on the server has changed since the
 	 * last read rpc or you have written to the file,
 	 * you may have lost data cache consistency with the
 	 * server, so flush all of the file's data out of the cache.
 	 * Then force a getattr rpc to ensure that you have up to date
 	 * attributes.
 	 * NB: This implies that cache data can be read when up to
 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
 	 * attributes this could be forced by setting n_attrstamp to 0 before
 	 * the VOP_GETATTR() call.
 	 */
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 		if (np->n_flag & NMODIFIED) {
 			if (vp->v_type != VREG) {
 				if (vp->v_type != VDIR)
 					panic("nfs: bioread, not dir");
 				nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	do {
 
 	    /*
 	     * Get a valid lease. If cached data is stale, flush it.
 	     */
 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, cred, p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE) ||
 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
 			if (vp->v_type == VDIR)
 			    nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 			    return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
 		    nfs_invaldir(vp);
 		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 		    if (error)
 			return (error);
 		}
 	    }
 	    if (np->n_flag & NQNFSNONCACHE) {
 		switch (vp->v_type) {
 		case VREG:
 			return (nfs_readrpc(vp, uio, cred));
 		case VLNK:
 			return (nfs_readlinkrpc(vp, uio, cred));
 		case VDIR:
 			break;
 		default:
 			printf(" NQNFSNONCACHE: type %x unexpected\n",	
 				vp->v_type);
 		};
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 
 		/*
 		 * Start the read ahead(s), as required.
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
 		    for (nra = 0; nra < nmp->nm_readahead && nra < seqcount &&
 			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
 			rabn = lbn + 1 + nra;
 			if (!incore(vp, rabn)) {
 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
 			    if (!rabp)
 				return (EINTR);
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= B_ASYNC;
 				rabp->b_iocmd = BIO_READ;
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred, p)) {
 				    rabp->b_flags |= B_INVAL;
 				    rabp->b_ioflags |= BIO_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				    break;
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		    }
 		}
 
 		/*
 		 * Obtain the buffer cache block.  Figure out the buffer size
 		 * when we are at EOF.  If we are modifying the size of the
 		 * buffer based on an EOF condition we need to hold 
 		 * nfs_rslock() through obtaining the buffer to prevent
 		 * a potential writer-appender from messing with n_size.
 		 * Otherwise we may accidently truncate the buffer and
 		 * lose dirty data.
 		 *
 		 * Note that bcount is *not* DEV_BSIZE aligned.
 		 */
 
 again:
 		bcount = biosize;
 		if ((off_t)lbn * biosize >= np->n_size) {
 			bcount = 0;
 		} else if ((off_t)(lbn + 1) * biosize > np->n_size) {
 			bcount = np->n_size - (off_t)lbn * biosize;
 		}
 		if (bcount != biosize) {
 			switch(nfs_rslock(np, p)) {
 			case ENOLCK:
 				goto again;
 				/* not reached */
 			case EINTR:
 			case ERESTART:
 				return(EINTR);
 				/* not reached */
 			default:
 				break;
 			}
 		}
 
 		bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 		if (bcount != biosize)
 			nfs_rsunlock(np, p);
 		if (!bp)
 			return (EINTR);
 
 		/*
 		 * If B_CACHE is not set, we must issue the read.  If this
 		 * fails, we return an error.
 		 */
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			brelse(bp);
 			return (error);
 		    }
 		}
 
 		/*
 		 * on is the offset into the current bp.  Figure out how many
 		 * bytes we can copy out of the bp.  Note that bcount is
 		 * NOT DEV_BSIZE aligned.
 		 *
 		 * Then figure out how many bytes we can copy into the uio.
 		 */
 
 		n = 0;
 		if (on < bcount)
 			n = min((unsigned)(bcount - on), uio->uio_resid);
 		break;
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
 		if (!bp)
 			return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			brelse(bp);
 			return (error);
 		    }
 		}
 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		on = 0;
 		break;
 	    case VDIR:
 		nfsstats.biocache_readdirs++;
 		if (np->n_direofoffset
 		    && uio->uio_offset >= np->n_direofoffset) {
 		    return (0);
 		}
 		lbn = (uoff_t)uio->uio_offset / NFS_DIRBLKSIZ;
 		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
 		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
 		if (!bp)
 		    return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_iocmd = BIO_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			    brelse(bp);
 		    }
 		    while (error == NFSERR_BAD_COOKIE) {
 			printf("got bad cookie vp %p bp %p\n", vp, bp);
 			nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
 			/*
 			 * Yuck! The directory has been modified on the
 			 * server. The only way to get the block is by
 			 * reading from the beginning to get all the
 			 * offset cookies.
 			 *
 			 * Leave the last bp intact unless there is an error.
 			 * Loop back up to the while if the error is another
 			 * NFSERR_BAD_COOKIE (double yuch!).
 			 */
 			for (i = 0; i <= lbn && !error; i++) {
 			    if (np->n_direofoffset
 				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
 				    return (0);
 			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
 			    if (!bp)
 				return (EINTR);
 			    if ((bp->b_flags & B_CACHE) == 0) {
 				    bp->b_iocmd = BIO_READ;
 				    vfs_busy_pages(bp, 0);
 				    error = nfs_doio(bp, cred, p);
 				    /*
 				     * no error + B_INVAL == directory EOF,
 				     * use the block.
 				     */
 				    if (error == 0 && (bp->b_flags & B_INVAL))
 					    break;
 			    }
 			    /*
 			     * An error will throw away the block and the
 			     * for loop will break out.  If no error and this
 			     * is not the block we want, we throw away the
 			     * block and go for the next one via the for loop.
 			     */
 			    if (error || i < lbn)
 				    brelse(bp);
 			}
 		    }
 		    /*
 		     * The above while is repeated if we hit another cookie
 		     * error.  If we hit an error and it wasn't a cookie error,
 		     * we give up.
 		     */
 		    if (error)
 			    return (error);
 		}
 
 		/*
 		 * If not eof and read aheads are enabled, start one.
 		 * (You need the current block first, so that you have the
 		 *  directory offset cookie of the next block.)
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
 		    (bp->b_flags & B_INVAL) == 0 &&
 		    (np->n_direofoffset == 0 ||
 		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
 		    !(np->n_flag & NQNFSNONCACHE) &&
 		    !incore(vp, lbn + 1)) {
 			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
 			if (rabp) {
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= B_ASYNC;
 				rabp->b_iocmd = BIO_READ;
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred, p)) {
 				    rabp->b_flags |= B_INVAL;
 				    rabp->b_ioflags |= BIO_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		}
 		/*
 		 * Unlike VREG files, whos buffer size ( bp->b_bcount ) is
 		 * chopped for the EOF condition, we cannot tell how large
 		 * NFS directories are going to be until we hit EOF.  So
 		 * an NFS directory buffer is *not* chopped to its EOF.  Now,
 		 * it just so happens that b_resid will effectively chop it
 		 * to EOF.  *BUT* this information is lost if the buffer goes
 		 * away and is reconstituted into a B_CACHE state ( due to
 		 * being VMIO ) later.  So we keep track of the directory eof
 		 * in np->n_direofoffset and chop it off as an extra step 
 		 * right here.
 		 */
 		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
 		if (np->n_direofoffset && n > np->n_direofoffset - uio->uio_offset)
 			n = np->n_direofoffset - uio->uio_offset;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 		break;
 	    };
 
 	    if (n > 0) {
 		    error = uiomove(bp->b_data + on, (int)n, uio);
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		break;
 	    case VLNK:
 		n = 0;
 		break;
 	    case VDIR:
 		/*
 		 * Invalidate buffer if caching is disabled, forcing a
 		 * re-read from the remote later.
 		 */
 		if (np->n_flag & NQNFSNONCACHE)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 	    }
 	    brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
 	return (error);
 }
 
 /*
  * Vnode op for write using bio
  */
 int
 nfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	int biosize;
 	struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
 	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
 	int bcount;
 	int n, on, error = 0, iomode, must_commit;
 	int haverslock = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("nfs_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("nfs_write proc");
 #endif
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		return (np->n_error);
 	}
 	if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
 	    (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 
 	/*
 	 * Synchronously flush pending buffers if we are in synchronous
 	 * mode or if we are appending.
 	 */
 	if (ioflag & (IO_APPEND | IO_SYNC)) {
 		if (np->n_flag & NMODIFIED) {
 			np->n_attrstamp = 0;
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 				return (error);
 		}
 	}
 
 	/*
 	 * If IO_APPEND then load uio_offset.  We restart here if we cannot
 	 * get the append lock.
 	 */
 restart:
 	if (ioflag & IO_APPEND) {
 		np->n_attrstamp = 0;
 		error = VOP_GETATTR(vp, &vattr, cred, p);
 		if (error)
 			return (error);
 		uio->uio_offset = np->n_size;
 	}
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if ((uio->uio_offset + uio->uio_resid) > nmp->nm_maxfilesize)
 		return (EFBIG);
 	if (uio->uio_resid == 0)
 		return (0);
 
 	/*
 	 * We need to obtain the rslock if we intend to modify np->n_size
 	 * in order to guarentee the append point with multiple contending
 	 * writers, to guarentee that no other appenders modify n_size
 	 * while we are trying to obtain a truncated buffer (i.e. to avoid
 	 * accidently truncating data written by another appender due to
 	 * the race), and to ensure that the buffer is populated prior to
 	 * our extending of the file.  We hold rslock through the entire
 	 * operation.
 	 *
 	 * Note that we do not synchronize the case where someone truncates
 	 * the file while we are appending to it because attempting to lock
 	 * this case may deadlock other parts of the system unexpectedly.
 	 */
 	if ((ioflag & IO_APPEND) ||
 	    uio->uio_offset + uio->uio_resid > np->n_size) {
 		switch(nfs_rslock(np, p)) {
 		case ENOLCK:
 			goto restart;
 			/* not reached */
 		case EINTR:
 		case ERESTART:
 			return(EINTR);
 			/* not reached */
 		default:
 			break;
 		}
 		haverslock = 1;
 	}
 
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, i don't think it matters
 	 */
 	if (p && uio->uio_offset + uio->uio_resid >
 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 		PROC_LOCK(p);
 		psignal(p, SIGXFSZ);
 		PROC_UNLOCK(p);
 		if (haverslock)
 			nfs_rsunlock(np, p);
 		return (EFBIG);
 	}
 
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 
 	do {
 		/*
 		 * Check for a valid write lease.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error)
 				break;
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 				np->n_brev = np->n_lrev;
 			}
 		}
 		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
 		    iomode = NFSV3WRITE_FILESYNC;
 		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
 		    if (must_commit)
 			    nfs_clearcommit(vp->v_mount);
 		    break;
 		}
 		nfsstats.biocache_writes++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize-1);
 		n = min((unsigned)(biosize - on), uio->uio_resid);
 again:
 		/*
 		 * Handle direct append and file extension cases, calculate
 		 * unaligned buffer size.
 		 */
 
 		if (uio->uio_offset == np->n_size && n) {
 			/*
 			 * Get the buffer (in its pre-append state to maintain
 			 * B_CACHE if it was previously set).  Resize the
 			 * nfsnode after we have locked the buffer to prevent
 			 * readers from reading garbage.
 			 */
 			bcount = on;
 			bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 			if (bp != NULL) {
 				long save;
 
 				np->n_size = uio->uio_offset + n;
 				np->n_flag |= NMODIFIED;
 				vnode_pager_setsize(vp, np->n_size);
 
 				save = bp->b_flags & B_CACHE;
 				bcount += n;
 				allocbuf(bp, bcount);
 				bp->b_flags |= save;
+				bp->b_magic = B_MAGIC_NFS;
+				bp->b_op = &buf_ops_nfs;
 			}
 		} else {
 			/*
 			 * Obtain the locked cache block first, and then 
 			 * adjust the file's size as appropriate.
 			 */
 			bcount = on + n;
 			if ((off_t)lbn * biosize + bcount < np->n_size) {
 				if ((off_t)(lbn + 1) * biosize < np->n_size)
 					bcount = biosize;
 				else
 					bcount = np->n_size - (off_t)lbn * biosize;
 			}
 
 			bp = nfs_getcacheblk(vp, lbn, bcount, p);
 
 			if (uio->uio_offset + n > np->n_size) {
 				np->n_size = uio->uio_offset + n;
 				np->n_flag |= NMODIFIED;
 				vnode_pager_setsize(vp, np->n_size);
 			}
 		}
 
 		if (!bp) {
 			error = EINTR;
 			break;
 		}
 
 		/*
 		 * Issue a READ if B_CACHE is not set.  In special-append
 		 * mode, B_CACHE is based on the buffer prior to the write
 		 * op and is typically set, avoiding the read.  If a read
 		 * is required in special append mode, the server will
 		 * probably send us a short-read since we extended the file
 		 * on our end, resulting in b_resid == 0 and, thusly, 
 		 * B_CACHE getting set.
 		 *
 		 * We can also avoid issuing the read if the write covers
 		 * the entire buffer.  We have to make sure the buffer state
 		 * is reasonable in this case since we will not be initiating
 		 * I/O.  See the comments in kern/vfs_bio.c's getblk() for
 		 * more information.
 		 *
 		 * B_CACHE may also be set due to the buffer being cached
 		 * normally.
 		 */
 
 		if (on == 0 && n == bcount) {
 			bp->b_flags |= B_CACHE;
 			bp->b_flags &= ~B_INVAL;
 			bp->b_ioflags &= ~BIO_ERROR;
 		}
 
 		if ((bp->b_flags & B_CACHE) == 0) {
 			bp->b_iocmd = BIO_READ;
 			vfs_busy_pages(bp, 0);
 			error = nfs_doio(bp, cred, p);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 		}
 		if (!bp) {
 			error = EINTR;
 			break;
 		}
 		if (bp->b_wcred == NOCRED) {
 			crhold(cred);
 			bp->b_wcred = cred;
 		}
 		np->n_flag |= NMODIFIED;
 
 		/*
 		 * If dirtyend exceeds file size, chop it down.  This should
 		 * not normally occur but there is an append race where it
 		 * might occur XXX, so we log it. 
 		 *
 		 * If the chopping creates a reverse-indexed or degenerate
 		 * situation with dirtyoff/end, we 0 both of them.
 		 */
 
 		if (bp->b_dirtyend > bcount) {
 			printf("NFS append race @%lx:%d\n", 
 			    (long)bp->b_blkno * DEV_BSIZE, 
 			    bp->b_dirtyend - bcount);
 			bp->b_dirtyend = bcount;
 		}
 
 		if (bp->b_dirtyoff >= bp->b_dirtyend)
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 
 		/*
 		 * If the new write will leave a contiguous dirty
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 *
 		 * While it is possible to merge discontiguous writes due to 
 		 * our having a B_CACHE buffer ( and thus valid read data
 		 * for the hole), we don't because it could lead to 
 		 * significant cache coherency problems with multiple clients,
 		 * especially if locking is implemented later on.
 		 *
 		 * as an optimization we could theoretically maintain
 		 * a linked list of discontinuous areas, but we would still
 		 * have to commit them separately so there isn't much
 		 * advantage to it except perhaps a bit of asynchronization.
 		 */
 
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			if (BUF_WRITE(bp) == EINTR)
 				return (EINTR);
 			goto again;
 		}
 
 		/*
 		 * Check for valid write lease and get one as required.
 		 * In case getblk() and/or bwrite() delayed us.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error) {
 				brelse(bp);
 				break;
 			}
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				brelse(bp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 				np->n_brev = np->n_lrev;
 				goto again;
 			}
 		}
 
 		error = uiomove((char *)bp->b_data + on, n, uio);
 
 		/*
 		 * Since this block is being modified, it must be written
 		 * again and not just committed.  Since write clustering does
 		 * not work for the stage 1 data write, only the stage 2
 		 * commit rpc, we have to clear B_CLUSTEROK as well.
 		 */
 		bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 
 		if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			brelse(bp);
 			break;
 		}
 
 		/*
 		 * Only update dirtyoff/dirtyend if not a degenerate 
 		 * condition.
 		 */
 		if (n) {
 			if (bp->b_dirtyend > 0) {
 				bp->b_dirtyoff = min(on, bp->b_dirtyoff);
 				bp->b_dirtyend = max((on + n), bp->b_dirtyend);
 			} else {
 				bp->b_dirtyoff = on;
 				bp->b_dirtyend = on + n;
 			}
 			vfs_bio_set_validclean(bp, on, n);
 		}
 
 		/*
 		 * If the lease is non-cachable or IO_SYNC do bwrite().
 		 *
 		 * IO_INVAL appears to be unused.  The idea appears to be
 		 * to turn off caching in this case.  Very odd.  XXX
 		 */
 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
 			if (ioflag & IO_INVAL)
 				bp->b_flags |= B_NOCACHE;
 			error = BUF_WRITE(bp);
 			if (error)
 				break;
 			if (np->n_flag & NQNFSNONCACHE) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					break;
 			}
 		} else if ((n + on) == biosize &&
 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 			bp->b_flags |= B_ASYNC;
 			(void)nfs_writebp(bp, 0, 0);
 		} else {
 			bdwrite(bp);
 		}
 	} while (uio->uio_resid > 0 && n > 0);
 
 	if (haverslock)
 		nfs_rsunlock(np, p);
 
 	return (error);
 }
 
 /*
  * Get an nfs cache block.
  *
  * Allocate a new one if the block isn't currently in the cache
  * and return the block marked busy. If the calling process is
  * interrupted by a signal for an interruptible mount point, return
  * NULL.
  *
  * The caller must carefully deal with the possible B_INVAL state of
  * the buffer.  nfs_doio() clears B_INVAL (and nfs_asyncio() clears it
  * indirectly), so synchronous reads can be issued without worrying about
  * the B_INVAL state.  We have to be a little more careful when dealing
  * with writes (see comments in nfs_write()) when extending a file past
  * its EOF.
  */
 static struct buf *
 nfs_getcacheblk(vp, bn, size, p)
 	struct vnode *vp;
 	daddr_t bn;
 	int size;
 	struct proc *p;
 {
 	register struct buf *bp;
 	struct mount *mp;
 	struct nfsmount *nmp;
 
 	mp = vp->v_mount;
 	nmp = VFSTONFS(mp);
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
 		while (bp == (struct buf *)0) {
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
 				return ((struct buf *)0);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else {
 		bp = getblk(vp, bn, size, 0, 0);
 	}
 
 	if (vp->v_type == VREG) {
 		int biosize;
 
 		biosize = mp->mnt_stat.f_iosize;
 		bp->b_blkno = bn * (biosize / DEV_BSIZE);
 	}
 	return (bp);
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int intrflg;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slpflag, slptimeo;
 
 	if (vp->v_flag & VXLOCK) {
 		return (0);
 	}
 
 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
 		intrflg = 0;
 	if (intrflg) {
 		slpflag = PCATCH;
 		slptimeo = 2 * hz;
 	} else {
 		slpflag = 0;
 		slptimeo = 0;
 	}
 	/*
 	 * First wait for any other process doing a flush to complete.
 	 */
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
 			return (EINTR);
 	}
 
 	/*
 	 * Now, flush as required.
 	 */
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
 				wakeup((caddr_t)&np->n_flag);
 			}
 			return (EINTR);
 		}
 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
 	}
 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
 	if (np->n_flag & NFLUSHWANT) {
 		np->n_flag &= ~NFLUSHWANT;
 		wakeup((caddr_t)&np->n_flag);
 	}
 	return (0);
 }
 
 /*
  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  * This is mainly to avoid queueing async I/O requests when the nfsiods
  * are all hung on a dead server.
  *
  * Note: nfs_asyncio() does not clear (BIO_ERROR|B_INVAL) but when the bp
  * is eventually dequeued by the async daemon, nfs_doio() *will*.
  */
 int
 nfs_asyncio(bp, cred, procp)
 	register struct buf *bp;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	struct nfsmount *nmp;
 	int i;
 	int gotiod;
 	int slpflag = 0;
 	int slptimeo = 0;
 	int error;
 
 	/*
 	 * If no async daemons then return EIO to force caller to run the rpc
 	 * synchronously.
 	 */
 	if (nfs_numasync == 0)
 		return (EIO);
 
 	nmp = VFSTONFS(bp->b_vp->v_mount);
 
 	/*
 	 * Commits are usually short and sweet so lets save some cpu and 
 	 * leave the async daemons for more important rpc's (such as reads
 	 * and writes).
 	 */
 	if (bp->b_iocmd == BIO_WRITE && (bp->b_flags & B_NEEDCOMMIT) &&
 	    (nmp->nm_bufqiods > nfs_numasync / 2)) {
 		return(EIO);
 	}
 
 again:
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	gotiod = FALSE;
 
 	/*
 	 * Find a free iod to process this request.
 	 */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 		if (nfs_iodwant[i]) {
 			/*
 			 * Found one, so wake it up and tell it which
 			 * mount to process.
 			 */
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waking iod %d for mount %p\n",
 				 i, nmp));
 			nfs_iodwant[i] = (struct proc *)0;
 			nfs_iodmount[i] = nmp;
 			nmp->nm_bufqiods++;
 			wakeup((caddr_t)&nfs_iodwant[i]);
 			gotiod = TRUE;
 			break;
 		}
 
 	/*
 	 * If none are free, we may already have an iod working on this mount
 	 * point.  If so, it will process our request.
 	 */
 	if (!gotiod) {
 		if (nmp->nm_bufqiods > 0) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: %d iods are already processing mount %p\n",
 				 nmp->nm_bufqiods, nmp));
 			gotiod = TRUE;
 		}
 	}
 
 	/*
 	 * If we have an iod which can process the request, then queue
 	 * the buffer.
 	 */
 	if (gotiod) {
 		/*
 		 * Ensure that the queue never grows too large.  We still want
 		 * to asynchronize so we block rather then return EIO.
 		 */
 		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
 			nmp->nm_bufqwant = TRUE;
 			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
 				       "nfsaio", slptimeo);
 			if (error) {
 				if (nfs_sigintr(nmp, NULL, procp))
 					return (EINTR);
 				if (slpflag == PCATCH) {
 					slpflag = 0;
 					slptimeo = 2 * hz;
 				}
 			}
 			/*
 			 * We might have lost our iod while sleeping,
 			 * so check and loop if nescessary.
 			 */
 			if (nmp->nm_bufqiods == 0) {
 				NFS_DPF(ASYNCIO,
 					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
 				goto again;
 			}
 		}
 
 		if (bp->b_iocmd == BIO_READ) {
 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_rcred = cred;
 			}
 		} else {
 			bp->b_flags |= B_WRITEINPROG;
 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_wcred = cred;
 			}
 		}
 
 		BUF_KERNPROC(bp);
 		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 		nmp->nm_bufqlen++;
 		return (0);
 	}
 
 	/*
 	 * All the iods are busy on other mounts, so return EIO to
 	 * force the caller to process the i/o synchronously.
 	 */
 	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
 	return (EIO);
 }
 
 /*
  * Do an I/O operation to/from a cache block. This may be called
  * synchronously or from an nfsiod.
  */
 int
 nfs_doio(bp, cr, p)
 	struct buf *bp;
 	struct ucred *cr;
 	struct proc *p;
 {
 	struct uio *uiop;
 	struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error = 0, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
 	vp = bp->b_vp;
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_procp = p;
 
 	/*
 	 * clear BIO_ERROR and B_INVAL state prior to initiating the I/O.  We
 	 * do this here so we do not have to do it in all the code that
 	 * calls us.
 	 */
 	bp->b_flags &= ~B_INVAL;
 	bp->b_ioflags &= ~BIO_ERROR;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_doio: bp %p already marked done", bp));
 
 	/*
 	 * Historically, paging was done with physio, but no more.
 	 */
 	if (bp->b_flags & B_PHYS) {
 	    /*
 	     * ...though reading /dev/drum still gets us here.
 	     */
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    /* mapping was done by vmapbuf() */
 	    io.iov_base = bp->b_data;
 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 	    if (bp->b_iocmd == BIO_READ) {
 		uiop->uio_rw = UIO_READ;
 		nfsstats.read_physios++;
 		error = nfs_readrpc(vp, uiop, cr);
 	    } else {
 		int com;
 
 		iomode = NFSV3WRITE_DATASYNC;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_physios++;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
 	    }
 	    if (error) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 	    }
 	} else if (bp->b_iocmd == BIO_READ) {
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    io.iov_base = bp->b_data;
 	    uiop->uio_rw = UIO_READ;
 	    switch (vp->v_type) {
 	    case VREG:
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop, cr);
 		if (!error) {
 		    if (uiop->uio_resid) {
 			/*
 			 * If we had a short read with no error, we must have
 			 * hit a file hole.  We should zero-fill the remainder.
 			 * This can also occur if the server hits the file EOF.
 			 *
 			 * Holes used to be able to occur due to pending 
 			 * writes, but that is not possible any longer.
 			 */
 			int nread = bp->b_bcount - uiop->uio_resid;
 			int left  = bp->b_bcount - nread;
 
 			if (left > 0)
 				bzero((char *)bp->b_data + nread, left);
 			uiop->uio_resid = 0;
 		    }
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
 			  NQNFS_CKINVALID(vp, np, ND_READ) &&
 			  np->n_lrev != np->n_brev) ||
 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 			uprintf("Process killed due to text file modification\n");
 			PROC_LOCK(p);
 			psignal(p, SIGKILL);
 			_PHOLD(p);
 			PROC_UNLOCK(p);
 		}
 		break;
 	    case VLNK:
 		uiop->uio_offset = (off_t)0;
 		nfsstats.readlink_bios++;
 		error = nfs_readlinkrpc(vp, uiop, cr);
 		break;
 	    case VDIR:
 		nfsstats.readdir_bios++;
 		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 			error = nfs_readdirplusrpc(vp, uiop, cr);
 			if (error == NFSERR_NOTSUPP)
 				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 		}
 		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 			error = nfs_readdirrpc(vp, uiop, cr);
 		/*
 		 * end-of-directory sets B_INVAL but does not generate an
 		 * error.
 		 */
 		if (error == 0 && uiop->uio_resid == bp->b_bcount)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
 	    };
 	    if (error) {
 		bp->b_ioflags |= BIO_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
 	    /* 
 	     * If we only need to commit, try to commit
 	     */
 	    if (bp->b_flags & B_NEEDCOMMIT) {
 		    int retv;
 		    off_t off;
 
 		    off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		    bp->b_flags |= B_WRITEINPROG;
 		    retv = nfs_commit(
 				bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 				bp->b_wcred, p);
 		    bp->b_flags &= ~B_WRITEINPROG;
 		    if (retv == 0) {
 			    bp->b_dirtyoff = bp->b_dirtyend = 0;
 			    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 			    bp->b_resid = 0;
 			    bufdone(bp);
 			    return (0);
 		    }
 		    if (retv == NFSERR_STALEWRITEVERF) {
 			    nfs_clearcommit(bp->b_vp->v_mount);
 		    }
 	    }
 
 	    /*
 	     * Setup for actual write
 	     */
 
 	    if ((off_t)bp->b_blkno * DEV_BSIZE + bp->b_dirtyend > np->n_size)
 		bp->b_dirtyend = np->n_size - (off_t)bp->b_blkno * DEV_BSIZE;
 
 	    if (bp->b_dirtyend > bp->b_dirtyoff) {
 		io.iov_len = uiop->uio_resid = bp->b_dirtyend
 		    - bp->b_dirtyoff;
 		uiop->uio_offset = (off_t)bp->b_blkno * DEV_BSIZE
 		    + bp->b_dirtyoff;
 		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_bios++;
 
 		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 		    iomode = NFSV3WRITE_UNSTABLE;
 		else
 		    iomode = NFSV3WRITE_FILESYNC;
 
 		bp->b_flags |= B_WRITEINPROG;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 
 		/*
 		 * When setting B_NEEDCOMMIT also set B_CLUSTEROK to try
 		 * to cluster the buffers needing commit.  This will allow
 		 * the system to submit a single commit rpc for the whole
 		 * cluster.  We can do this even if the buffer is not 100% 
 		 * dirty (relative to the NFS blocksize), so we optimize the
 		 * append-to-file-case.
 		 *
 		 * (when clearing B_NEEDCOMMIT, B_CLUSTEROK must also be
 		 * cleared because write clustering only works for commit
 		 * rpc's, not for the data portion of the write).
 		 */
 
 		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 		    bp->b_flags |= B_NEEDCOMMIT;
 		    if (bp->b_dirtyoff == 0
 			&& bp->b_dirtyend == bp->b_bcount)
 			bp->b_flags |= B_CLUSTEROK;
 		} else {
 		    bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 		}
 		bp->b_flags &= ~B_WRITEINPROG;
 
 		/*
 		 * For an interrupted write, the buffer is still valid
 		 * and the write hasn't been pushed to the server yet,
 		 * so we can't set BIO_ERROR and report the interruption
 		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 		 * is not relevant, so the rpc attempt is essentially
 		 * a noop.  For the case of a V3 write rpc not being
 		 * committed to stable storage, the block is still
 		 * dirty and requires either a commit rpc or another
 		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
 		 * the block is reused. This is indicated by setting
 		 * the B_DELWRI and B_NEEDCOMMIT flags.
 		 *
 		 * If the buffer is marked B_PAGING, it does not reside on
 		 * the vp's paging queues so we cannot call bdirty().  The
 		 * bp in this case is not an NFS cache block so we should
 		 * be safe. XXX
 		 */
     		if (error == EINTR
 		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
 			int s;
 
 			s = splbio();
 			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 			if ((bp->b_flags & B_PAGING) == 0) {
 			    bdirty(bp);
 			    bp->b_flags &= ~B_DONE;
 			}
 			if (error && (bp->b_flags & B_ASYNC) == 0)
 			    bp->b_flags |= B_EINTR;
 			splx(s);
 	    	} else {
 		    if (error) {
 			bp->b_ioflags |= BIO_ERROR;
 			bp->b_error = np->n_error = error;
 			np->n_flag |= NWRITEERR;
 		    }
 		    bp->b_dirtyoff = bp->b_dirtyend = 0;
 		}
 	    } else {
 		bp->b_resid = 0;
 		bufdone(bp);
 		return (0);
 	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
 	    nfs_clearcommit(vp->v_mount);
 	bufdone(bp);
 	return (error);
 }
Index: head/sys/nfsclient/nfs_vnops.c
===================================================================
--- head/sys/nfsclient/nfs_vnops.c	(revision 75579)
+++ head/sys/nfsclient/nfs_vnops.c	(revision 75580)
@@ -1,3400 +1,3385 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
  * $FreeBSD$
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <miscfs/fifofs/fifo.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *, const char *, int,
 			struct ucred *, struct proc *, struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
-static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
-	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, const char *name,
 				   int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, const char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   const char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_int32_t nfs_true, nfs_false;
 extern u_int32_t nfs_xdrneg1;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, 
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfsv3_commit_on_close = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, 
 	   &nfsv3_commit_on_close, 0, "write+commit on close, else only write");
 #if 0
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, 
 	   &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count");
 
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, 
 	   &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count");
 #endif
 
 #define	NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY		\
 			 | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE	\
 			 | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP)
 static int
 nfs3_access_otw(struct vnode *vp,
 		int wmode,
 		struct proc *p,
 		struct ucred *cred)
 {
 	const int v3 = 1;
 	u_int32_t *tl;
 	int error = 0, attrflag;
 	
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	caddr_t bpos, dpos, cp2;
 	register int32_t t1, t2;
 	register caddr_t cp;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 
 	nfsstats.rpccnt[NFSPROC_ACCESS]++;
 	nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 	nfsm_fhtom(vp, v3);
 	nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 	*tl = txdr_unsigned(wmode); 
 	nfsm_request(vp, NFSPROC_ACCESS, p, cred);
 	nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		rmode = fxdr_unsigned(u_int32_t, *tl);
 		np->n_mode = rmode;
 		np->n_modeuid = cred->cr_uid;
 		np->n_modestamp = time_second;
 	}
 	nfsm_reqdone;
 	return error;
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	int error = 0;
 	u_int32_t mode, wmode;
 	int v3 = NFS_ISV3(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v3) {
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | 
 				NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | 
 				NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) &&
 		    (ap->a_cred->cr_uid == np->n_modeuid) &&
 		    ((np->n_mode & mode) == mode)) {
 			nfsstats.accesscache_hits++;
 		} else {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			nfsstats.accesscache_misses++;
 		        error = nfs3_access_otw(vp, wmode, ap->a_p,ap->a_cred);
 			if (!error) {
 				if ((np->n_mode & mode) != mode) {
 					error = EACCES;
 				}
 			}
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0)
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) {
 #ifdef DIAGNOSTIC
 		printf("open eacces vtyp=%d\n",vp->v_type);
 #endif
 		return (EACCES);
 	}
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR)
 				return (error);
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    /*
 		     * Under NFSv3 we have dirty buffers to dispose of.  We
 		     * must flush them to the NFS server.  We have the option
 		     * of waiting all the way through the commit rpc or just
 		     * waiting for the initial write.  The default is to only
 		     * wait through the initial write so the data is in the
 		     * server's cache, which is roughly similar to the state
 		     * a standard disk subsystem leaves the file in on close().
 		     *
 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
 		     * potential races with other processes, and certainly
 		     * cannot clear it if we don't commit.
 		     */
 		    int cm = nfsv3_commit_on_close ? 1 : 0;
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, cm);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else {
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		}
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 
 	if (v3 && nfsaccess_cache_timeout > 0) {
 		nfsstats.accesscache_misses++;
 		nfs3_access_otw(vp, NFSV3ACCESS_ALL, ap->a_p, ap->a_cred);
 		if (nfs_getattrcache(vp, ap->a_vap) == 0)
 			return (0);
 	}
 
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error) {
 		nfsm_loadattr(vp, ap->a_vap);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags is not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			vnode_pager_setsize(vp, vap->va_size);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error) {
 				vnode_pager_setsize(vp, np->n_size);
  				return (error);
 			    }
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_int32_t *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		nfsm_v3attrbuild(vap, TRUE);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (mode_t)VNOVAL)
 			sp->sa_mode = nfs_xdrneg1;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = nfs_xdrneg1;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = nfs_xdrneg1;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	u_int32_t *tl;
 	caddr_t cp;
 	int32_t t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	*vpp = NULLVP;
 	cnp->cn_flags &= ~PDIRUNLOCK;
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) != 0) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			cnp->cn_flags |= PDIRUNLOCK;
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN)) {
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 				if (error == 0)
 					cnp->cn_flags &= ~PDIRUNLOCK;
 			}
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN)) {
 				VOP_UNLOCK(dvp, 0, p);
 				cnp->cn_flags |= PDIRUNLOCK;
 			}
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error) {
 			cnp->cn_flags |= PDIRUNLOCK;
 			return (error);
 		}
 		cnp->cn_flags &= ~PDIRUNLOCK;
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent) {
 			VOP_UNLOCK(dvp, 0, p);
 			cnp->cn_flags |= PDIRUNLOCK;
 		}
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN)) {
 			error = vn_lock(dvp, LK_EXCLUSIVE, p);
 			if (error) {
 				cnp->cn_flags |= PDIRUNLOCK;
 		    		vput(newvp);
 				return (error);
 			}
 		} else
 			cnp->cn_flags |= PDIRUNLOCK;
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN)) {
 			cnp->cn_flags |= PDIRUNLOCK;
 			VOP_UNLOCK(dvp, 0, p);
 		}
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent) {
 				VOP_UNLOCK(dvp, 0, p);
 				cnp->cn_flags |= PDIRUNLOCK;
 			}
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		if (len == NFS_MAXPATHLEN) {
 			struct nfsnode *np = VTONFS(vp);
 			if (np->n_size && np->n_size < NFS_MAXPATHLEN)
 				len = np->n_size;
 		}
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 			*tl = txdr_unsigned(len);
 		} else {
 			register u_int32_t x;
 
 			nfsm_build(tl, u_int32_t *, 4 * NFSX_UNSIGNED);
 			/* Set both "begin" and "current" to non-garbage. */
 			x = txdr_unsigned((u_int32_t)uiop->uio_offset);
 			*tl++ = x;	/* "begin offset" */
 			*tl++ = x;	/* "current offset" */
 			x = txdr_unsigned(len);
 			*tl++ = x;	/* total to this offset */
 			*tl = x;	/* size of this write */
 		}
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *, 2 * NFSX_UNSIGNED
 					+ NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					m_freem(mrep);
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_state |= NFSSTA_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		if (error)
 			break;
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_int32_t rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = nfs_xdrneg1;
 	else {
 		return (EOPNOTSUPP);
 	}
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		nfsm_v3attrbuild(vap, FALSE);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(umajor(vap->va_rdev));
 			*tl = txdr_unsigned(uminor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	return nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_int32_t *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 			*tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 			nfsm_build(tl, u_int32_t *, NFSX_V3CREATEVERF);
 #ifdef INET
 			if (!TAILQ_EMPTY(&in_ifaddrhead))
 				*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 			else
 #endif
 				*tl++ = create_verf;
 			*tl = ++create_verf;
 		} else {
 			*tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 			nfsm_v3attrbuild(vap, FALSE);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL)) {
 		/*
 		 * We are normally called with only a partially initialized
 		 * VAP.  Since the NFSv3 spec says that server may use the
 		 * file attributes to store the verifier, the spec requires
 		 * us to do a SETATTR RPC. FreeBSD servers store the verifier
 		 * in atime, but we can't really assume that all servers will
 		 * so we ensure that our SETATTR sets both atime and mtime.
 		 */
 		if (vap->va_mtime.tv_sec == VNOVAL)
 			vfs_timestamp(&vap->va_mtime);
 		if (vap->va_atime.tv_sec == VNOVAL)
 			vap->va_atime = vap->va_mtime;
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	}
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	np->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	const char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 */
 
 	VOP_FSYNC(fvp, fcnp->cn_cred, MNT_WAIT, fcnp->cn_proc);
 	if (tvp)
 	    VOP_FSYNC(tvp, tcnp->cn_cred, MNT_WAIT, tcnp->cn_proc);
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	const char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	const char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3;
 
 	if (vp->v_mount != tdvp->v_mount) {
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	v3 = NFS_ISV3(vp);
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 
 	/*
 	 * Issue the NFS request and get the rpc response.
 	 *
 	 * Only NFSv3 responses returning an error of 0 actually return
 	 * a file handle that can be converted into newvp without having
 	 * to do an extra lookup rpc.
 	 */
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (error == 0)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 
 	/*
 	 * out code jumps -> here, mrep is also freed.
 	 */
 
 	nfsm_reqdone;
 
 	/*
 	 * If we get an EEXIST error, silently convert it to no-error
 	 * in case of an NFS retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 
 	/*
 	 * If we do not have (or no longer have) an error, and we could
 	 * not extract the newvp from the response due to the request being
 	 * NFSv2 or the error being EEXIST.  We have to do a lookup in order
 	 * to obtain a newvp to return.  
 	 */
 	if (error == 0 && newvp == NULL) {
 		struct nfsnode *np = NULL;
 
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_proc, &np);
 		if (!error)
 			newvp = NFSTOV(np);
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		*ap->a_vpp = newvp;
 	}
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) != 0) {
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_v3attrbuild(vap, FALSE);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = nfs_xdrneg1;
 		sp->sa_gid = nfs_xdrneg1;
 		sp->sa_size = nfs_xdrneg1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	if (dvp == vp)
 		return (EINVAL);
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp = NULL;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_int32_t *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_int32_t *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 				fileno = fxdr_hyper(tl);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_int32_t *,
 				    3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_int32_t *,
 				    2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_int32_t *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			fileno = fxdr_hyper(tl);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np);
 				    if (error)
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit && bigenough) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    if (newvp == vp)
 				vrele(newvp);
 			    else
 				vput(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_int32_t *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	error = nfs_renameit(dvp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	const char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_int32_t *tl;
 	register caddr_t cp;
 	register int32_t t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 int
 nfs_commit(vp, offset, cnt, cred, procp)
 	struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_int32_t *tl;
 	register int32_t t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_int32_t *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_int32_t *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	KASSERT(BUF_REFCNT(bp) > 0, ("nfs_strategy: buffer %p not locked", bp));
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED, p))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (BUF_REFCNT(bp) == 0 &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT) ||
 			    BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to 
 			 * initiate I/O on the bp.
 			 *
 			 * Note: to avoid loopback deadlocks, we do not
 			 * assign b_runningbufspace.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= B_WRITEINPROG;
 			vfs_busy_pages(bp, 1);
 
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_vnbufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG | B_CLUSTEROK);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS 
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				s = splbio();
 				vp->v_numoutput++;
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~B_DONE;
 				bp->b_ioflags &= ~BIO_ERROR;
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				splx(s);
 				bufdone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
 		nbp = TAILQ_NEXT(bp, b_vnbufs);
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL,
 			    "nfsfsync", slpflag, slptimeo);
 			splx(s);
 			if (error == 0)
 				panic("nfs_fsync: inconsistent lock");
 			if (error == ENOLCK)
 				goto loop;
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= B_ASYNC | B_WRITEINPROG;
 		splx(s);
 		BUF_WRITE(bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd) && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%x",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
- * Just call nfs_writebp() with the force argument set to 1.
- *
- * NOTE: B_DONE may or may not be set in a_bp on call.
- */
-static int
-nfs_bwrite(ap)
-	struct vop_bwrite_args /* {
-		struct vnode *a_bp;
-	} */ *ap;
-{
-	return (nfs_writebp(ap->a_bp, 1, curproc));
-}
-
-/*
- * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
- * the force flag is one and it also handles the B_NEEDCOMMIT flag.  We set
- * B_CACHE if this is a VMIO buffer.
+ * This is the "real" nfs::bwrite(struct buf*).
+ * B_WRITEINPROG isn't set unless the force flag is one and it 
+ * handles the B_NEEDCOMMIT flag.
+ * We set B_CACHE if this is a VMIO buffer.
  */
 int
 nfs_writebp(bp, force, procp)
 	register struct buf *bp;
 	int force;
 	struct proc *procp;
 {
 	int s;
 	int oldflags = bp->b_flags;
 #if 0
 	int retv = 1;
 	off_t off;
 #endif
 
 	if (BUF_REFCNT(bp) == 0)
 		panic("bwrite: buffer is not locked???");
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return(0);
 	}
 
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 
 	s = splbio();
 	bundirty(bp);
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_iocmd = BIO_WRITE;
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
 	splx(s);
 
 	/*
 	 * Note: to avoid loopback deadlocks, we do not
 	 * assign b_runningbufspace.
 	 */
 	vfs_busy_pages(bp, 1);
 
 	if (force)
 		bp->b_flags |= B_WRITEINPROG;
 	BUF_KERNPROC(bp);
 	BUF_STRATEGY(bp);
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 
 		if (oldflags & B_DELWRI) {
 			s = splbio();
 			reassignbuf(bp, bp->b_vp);
 			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	getnanotime(&np->n_atim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	getnanotime(&np->n_mtim);
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		getnanotime(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/ntfs/ntfs_vnops.c
===================================================================
--- head/sys/ntfs/ntfs_vnops.c	(revision 75579)
+++ head/sys/ntfs/ntfs_vnops.c	(revision 75580)
@@ -1,943 +1,942 @@
 /*	$NetBSD: ntfs_vnops.c,v 1.23 1999/10/31 19:45:27 jdolecek Exp $	*/
 
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * John Heidemann of the UCLA Ficus project.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * $FreeBSD$
  *
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/time.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/dirent.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #if defined(__NetBSD__)
 #include <vm/vm_prot.h>
 #endif
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #if defined(__FreeBSD__)
 #include <vm/vnode_pager.h>
 #endif
 #include <vm/vm_extern.h>
 
 #include <sys/sysctl.h>
 
 /*#define NTFS_DEBUG 1*/
 #include <ntfs/ntfs.h>
 #include <ntfs/ntfs_inode.h>
 #include <ntfs/ntfs_subr.h>
 #if defined(__NetBSD__)
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/genfs/genfs.h>
 #endif
 
 #include <sys/unistd.h> /* for pathconf(2) constants */
 
 static int	ntfs_read __P((struct vop_read_args *));
 static int	ntfs_write __P((struct vop_write_args *ap));
 static int	ntfs_getattr __P((struct vop_getattr_args *ap));
 static int	ntfs_inactive __P((struct vop_inactive_args *ap));
 static int	ntfs_print __P((struct vop_print_args *ap));
 static int	ntfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	ntfs_strategy __P((struct vop_strategy_args *ap));
 static int	ntfs_access __P((struct vop_access_args *ap));
 static int	ntfs_open __P((struct vop_open_args *ap));
 static int	ntfs_close __P((struct vop_close_args *ap));
 static int	ntfs_readdir __P((struct vop_readdir_args *ap));
 static int	ntfs_lookup __P((struct vop_lookup_args *ap));
 static int	ntfs_bmap __P((struct vop_bmap_args *ap));
 #if defined(__FreeBSD__)
 static int	ntfs_getpages __P((struct vop_getpages_args *ap));
 static int	ntfs_putpages __P((struct vop_putpages_args *));
 static int	ntfs_fsync __P((struct vop_fsync_args *ap));
 #else
 static int	ntfs_bypass __P((struct vop_generic_args *ap));
 #endif
 static int	ntfs_pathconf __P((void *));
 
 int	ntfs_prtactive = 1;	/* 1 => print out reclaim of active vnodes */
 
 #if defined(__FreeBSD__)
 int
 ntfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_reqpage);
 }
 
 int
 ntfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
 #endif
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 int
 ntfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	dprintf(("ntfs_bmap: vn: %p, blk: %d\n", ap->a_vp,(u_int32_t)ap->a_bn));
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 #if !defined(__NetBSD__)
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 #endif
 	return (0);
 }
 
 static int
 ntfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int64_t toread;
 	int error;
 
 	dprintf(("ntfs_read: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 
 	dprintf(("ntfs_read: filesize: %d",(u_int32_t)fp->f_size));
 
 	/* don't allow reading after end of file */
 	if (uio->uio_offset > fp->f_size)
 		toread = 0;
 	else
 		toread = min( uio->uio_resid, fp->f_size - uio->uio_offset );
 
 	dprintf((", toread: %d\n",(u_int32_t)toread));
 
 	if (toread == 0)
 		return (0);
 
 	error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, toread, NULL, uio);
 	if (error) {
 		printf("ntfs_read: ntfs_readattr failed: %d\n",error);
 		return (error);
 	}
 
 	return (0);
 }
 
 #if !defined(__FreeBSD__)
 
 static int
 ntfs_bypass(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	int error = ENOTTY;
 	dprintf(("ntfs_bypass: %s\n", ap->a_desc->vdesc_name));
 	return (error);
 }
 
 #endif
 
 static int
 ntfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	register struct vattr *vap = ap->a_vap;
 
 	dprintf(("ntfs_getattr: %d, flags: %d\n",ip->i_number,ip->i_flag));
 
 #if defined(__FreeBSD__)
 	vap->va_fsid = dev2udev(ip->i_dev);
 #else /* NetBSD */
 	vap->va_fsid = ip->i_dev;
 #endif
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mp->ntm_mode;
 	vap->va_nlink = ip->i_nlink;
 	vap->va_uid = ip->i_mp->ntm_uid;
 	vap->va_gid = ip->i_mp->ntm_gid;
 	vap->va_rdev = 0;				/* XXX UNODEV ? */
 	vap->va_size = fp->f_size;
 	vap->va_bytes = fp->f_allocated;
 	vap->va_atime = ntfs_nttimetounix(fp->f_times.t_access);
 	vap->va_mtime = ntfs_nttimetounix(fp->f_times.t_write);
 	vap->va_ctime = ntfs_nttimetounix(fp->f_times.t_create);
 	vap->va_flags = ip->i_flag;
 	vap->va_gen = 0;
 	vap->va_blocksize = ip->i_mp->ntm_spc * ip->i_mp->ntm_bps;
 	vap->va_type = vp->v_type;
 	vap->va_filerev = 0;
 	return (0);
 }
 
 
 /*
  * Last reference to an ntnode.  If necessary, write or delete it.
  */
 int
 ntfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 #ifdef NTFS_DEBUG
 	register struct ntnode *ip = VTONT(vp);
 #endif
 
 	dprintf(("ntfs_inactive: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_inactive: pushing active", vp);
 
 	VOP__UNLOCK(vp, 0, ap->a_p);
 
 	/* XXX since we don't support any filesystem changes
 	 * right now, nothing more needs to be done
 	 */
 	return (0);
 }
 
 /*
  * Reclaim an fnode/ntnode so that it can be used for other purposes.
  */
 int
 ntfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	int error;
 
 	dprintf(("ntfs_reclaim: vnode: %p, ntnode: %d\n", vp, ip->i_number));
 
 	if (ntfs_prtactive && vp->v_usecount != 0)
 		vprint("ntfs_reclaim: pushing active", vp);
 
 	if ((error = ntfs_ntget(ip)) != 0)
 		return (error);
 	
 	/* Purge old data structures associated with the inode. */
 	cache_purge(vp);
 	if (ip->i_devvp) {
 		vrele(ip->i_devvp);
 		ip->i_devvp = NULL;
 	}
 
 	ntfs_frele(fp);
 	ntfs_ntput(ip);
 	vp->v_data = NULL;
 
 	return (0);
 }
 
 static int
 ntfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 int
 ntfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct ntfsmount *ntmp = ip->i_mp;
 	int error;
 
 #ifdef __FreeBSD__
 	dprintf(("ntfs_strategy: offset: %d, blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_offset,(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 #else
 	dprintf(("ntfs_strategy: blkno: %d, lblkno: %d\n",
 		(u_int32_t)bp->b_blkno,
 		(u_int32_t)bp->b_lblkno));
 #endif
 
 	dprintf(("strategy: bcount: %d flags: 0x%lx\n", 
 		(u_int32_t)bp->b_bcount,bp->b_flags));
 
 	if (bp->b_iocmd == BIO_READ) {
 		u_int32_t toread;
 
 		if (ntfs_cntob(bp->b_blkno) >= fp->f_size) {
 			clrbuf(bp);
 			error = 0;
 		} else {
 			toread = min(bp->b_bcount,
 				 fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: toread: %d, fsize: %d\n",
 				toread,(u_int32_t)fp->f_size));
 
 			error = ntfs_readattr(ntmp, ip, fp->f_attrtype,
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),
 				toread, bp->b_data, NULL);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_readattr failed\n");
 				bp->b_error = error;
 				bp->b_ioflags |= BIO_ERROR;
 			}
 
 			bzero(bp->b_data + toread, bp->b_bcount - toread);
 		}
 	} else {
 		size_t tmp;
 		u_int32_t towrite;
 
 		if (ntfs_cntob(bp->b_blkno) + bp->b_bcount >= fp->f_size) {
 			printf("ntfs_strategy: CAN'T EXTEND FILE\n");
 			bp->b_error = error = EFBIG;
 			bp->b_ioflags |= BIO_ERROR;
 		} else {
 			towrite = min(bp->b_bcount,
 				fp->f_size-ntfs_cntob(bp->b_blkno));
 			dprintf(("ntfs_strategy: towrite: %d, fsize: %d\n",
 				towrite,(u_int32_t)fp->f_size));
 
 			error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,	
 				fp->f_attrname, ntfs_cntob(bp->b_blkno),towrite,
 				bp->b_data, &tmp, NULL);
 
 			if (error) {
 				printf("ntfs_strategy: ntfs_writeattr fail\n");
 				bp->b_error = error;
 				bp->b_ioflags |= BIO_ERROR;
 			}
 		}
 	}
 	bufdone(bp);
 	return (error);
 }
 
 static int
 ntfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	u_int64_t towrite;
 	size_t written;
 	int error;
 
 	dprintf(("ntfs_write: ino: %d, off: %d resid: %d, segflg: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid,uio->uio_segflg));
 	dprintf(("ntfs_write: filesize: %d",(u_int32_t)fp->f_size));
 
 	if (uio->uio_resid + uio->uio_offset > fp->f_size) {
 		printf("ntfs_write: CAN'T WRITE BEYOND END OF FILE\n");
 		return (EFBIG);
 	}
 
 	towrite = min(uio->uio_resid, fp->f_size - uio->uio_offset);
 
 	dprintf((", towrite: %d\n",(u_int32_t)towrite));
 
 	error = ntfs_writeattr_plain(ntmp, ip, fp->f_attrtype,
 		fp->f_attrname, uio->uio_offset, towrite, NULL, &written, uio);
 #ifdef NTFS_DEBUG
 	if (error)
 		printf("ntfs_write: ntfs_writeattr failed: %d\n", error);
 #endif
 
 	return (error);
 }
 
 int
 ntfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct ntnode *ip = VTONT(vp);
 	mode_t mode = ap->a_mode;
 #ifdef QUOTA
 	int error;
 #endif
 
 	dprintf(("ntfs_access: %d\n",ip->i_number));
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch ((int)vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if (error = getinoquota(ip))
 				return (error);
 #endif
 			break;
 		}
 	}
 
 	return (vaccess(vp->v_type, ip->i_mp->ntm_mode, ip->i_mp->ntm_uid,
 	    ip->i_mp->ntm_gid, ap->a_mode, ap->a_cred, NULL));
 } 
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 static int
 ntfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_open: %d\n",ip->i_number);
 #endif
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ntfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 #if NTFS_DEBUG
 	register struct vnode *vp = ap->a_vp;
 	register struct ntnode *ip = VTONT(vp);
 
 	printf("ntfs_close: %d\n",ip->i_number);
 #endif
 
 	return (0);
 }
 
 int
 ntfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_ncookies;
 		u_int **cookies;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct fnode *fp = VTOF(vp);
 	register struct ntnode *ip = FTONT(fp);
 	struct uio *uio = ap->a_uio;
 	struct ntfsmount *ntmp = ip->i_mp;
 	int i, error = 0;
 	u_int32_t faked = 0, num;
 	int ncookies = 0;
 	struct dirent cde;
 	off_t off;
 
 	dprintf(("ntfs_readdir %d off: %d resid: %d\n",ip->i_number,(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	off = uio->uio_offset;
 
 	/* Simulate . in every dir except ROOT */
 	if( ip->i_number != NTFS_ROOTINO ) {
 		struct dirent dot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 1, "." };
 
 		if( uio->uio_offset < sizeof(struct dirent) ) {
 			dot.d_fileno = ip->i_number;
 			error = uiomove((char *)&dot,sizeof(struct dirent),uio);
 			if(error)
 				return (error);
 
 			ncookies ++;
 		}
 	}
 
 	/* Simulate .. in every dir including ROOT */
 	if( uio->uio_offset < 2 * sizeof(struct dirent) ) {
 		struct dirent dotdot = { NTFS_ROOTINO,
 				sizeof(struct dirent), DT_DIR, 2, ".." };
 
 		error = uiomove((char *)&dotdot,sizeof(struct dirent),uio);
 		if(error)
 			return (error);
 
 		ncookies ++;
 	}
 
 	faked = (ip->i_number == NTFS_ROOTINO) ? 1 : 2;
 	num = uio->uio_offset / sizeof(struct dirent) - faked;
 
 	while( uio->uio_resid >= sizeof(struct dirent) ) {
 		struct attr_indexentry *iep;
 
 		error = ntfs_ntreaddir(ntmp, fp, num, &iep);
 
 		if(error)
 			return (error);
 
 		if( NULL == iep )
 			break;
 
 		for(; !(iep->ie_flag & NTFS_IEFLAG_LAST) && (uio->uio_resid >= sizeof(struct dirent));
 			iep = NTFS_NEXTREC(iep, struct attr_indexentry *))
 		{
 			if(!ntfs_isnamepermitted(ntmp,iep))
 				continue;
 
 			for(i=0; i<iep->ie_fnamelen; i++) {
 				cde.d_name[i] = ntfs_u28(iep->ie_fname[i]);
 			}
 			cde.d_name[i] = '\0';
 			dprintf(("ntfs_readdir: elem: %d, fname:[%s] type: %d, flag: %d, ",
 				num, cde.d_name, iep->ie_fnametype,
 				iep->ie_flag));
 			cde.d_namlen = iep->ie_fnamelen;
 			cde.d_fileno = iep->ie_number;
 			cde.d_type = (iep->ie_fflag & NTFS_FFLAG_DIR) ? DT_DIR : DT_REG;
 			cde.d_reclen = sizeof(struct dirent);
 			dprintf(("%s\n", (cde.d_type == DT_DIR) ? "dir":"reg"));
 
 			error = uiomove((char *)&cde, sizeof(struct dirent), uio);
 			if(error)
 				return (error);
 
 			ncookies++;
 			num++;
 		}
 	}
 
 	dprintf(("ntfs_readdir: %d entries (%d bytes) read\n",
 		ncookies,(u_int)(uio->uio_offset - off)));
 	dprintf(("ntfs_readdir: off: %d resid: %d\n",
 		(u_int32_t)uio->uio_offset,uio->uio_resid));
 
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dp;
 #if defined(__FreeBSD__)
 		u_long *cookies;
 		u_long *cookiep;
 #else /* defined(__NetBSD__) */
 		off_t *cookies;
 		off_t *cookiep;
 #endif
 
 		ddprintf(("ntfs_readdir: %d cookies\n",ncookies));
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ntfs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     ((caddr_t)uio->uio_iov->iov_base -
 			 (uio->uio_offset - off));
 #if defined(__FreeBSD__)
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long),
 		       M_TEMP, M_WAITOK);
 #else /* defined(__NetBSD__) */
 		MALLOC(cookies, off_t *, ncookies * sizeof(off_t),
 		       M_TEMP, M_WAITOK);
 #endif
 		for (dp = dpStart, cookiep = cookies, i=0;
 		     i < ncookies;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen), i++) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_int) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 /*
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTONT(ap->a_vp)->i_size <= uio->uio_offset;
 */
 	return (error);
 }
 
 int
 ntfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct ntnode *dip = VTONT(dvp);
 	struct ntfsmount *ntmp = dip->i_mp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int error;
 	int lockparent = cnp->cn_flags & LOCKPARENT;
 #if NTFS_DEBUG
 	int wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT);
 #endif
 	dprintf(("ntfs_lookup: \"%.*s\" (%ld bytes) in %d, lp: %d, wp: %d \n",
 		(int)cnp->cn_namelen, cnp->cn_nameptr, cnp->cn_namelen,
 		dip->i_number, lockparent, wantparent));
 
 	error = VOP_ACCESS(dvp, VEXEC, cred, cnp->cn_proc);
 	if(error)
 		return (error);
 
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 
 #ifdef __NetBSD__
 	/*
 	 * We now have a segment name to search for, and a directory
 	 * to search.
 	 *
 	 * Before tediously performing a linear scan of the directory,
 	 * check the name cache to see if the directory/name pair
 	 * we are looking for is known already.
 	 */
 	if ((error = cache_lookup(ap->a_dvp, ap->a_vpp, cnp)) >= 0)
 		return (error);
 #endif
 
 	if(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		dprintf(("ntfs_lookup: faking . directory in %d\n",
 			dip->i_number));
 
 		VREF(dvp);
 		*ap->a_vpp = dvp;
 		error = 0;
 	} else if (cnp->cn_flags & ISDOTDOT) {
 		struct ntvattr *vap;
 
 		dprintf(("ntfs_lookup: faking .. directory in %d\n",
 			 dip->i_number));
 
 		error = ntfs_ntvattrget(ntmp, dip, NTFS_A_NAME, NULL, 0, &vap);
 		if(error)
 			return (error);
 
 		VOP__UNLOCK(dvp,0,cnp->cn_proc);
 		cnp->cn_flags |= PDIRUNLOCK;
 
 		dprintf(("ntfs_lookup: parentdir: %d\n",
 			 vap->va_a_name->n_pnumber));
 		error = VFS_VGET(ntmp->ntm_mountp,
 				 vap->va_a_name->n_pnumber,ap->a_vpp); 
 		ntfs_ntvattrrele(vap);
 		if (error) {
 			if (VN_LOCK(dvp,LK_EXCLUSIVE|LK_RETRY,cnp->cn_proc)==0)
 				cnp->cn_flags &= ~PDIRUNLOCK;
 			return (error);
 		}
 
 		if (lockparent && (cnp->cn_flags & ISLASTCN)) {
 			error = VN_LOCK(dvp, LK_EXCLUSIVE, cnp->cn_proc);
 			if (error) {
 				vput( *(ap->a_vpp) );
 				return (error);
 			}
 			cnp->cn_flags &= ~PDIRUNLOCK;
 		}
 	} else {
 		error = ntfs_ntlookupfile(ntmp, dvp, cnp, ap->a_vpp);
 		if (error) {
 			dprintf(("ntfs_ntlookupfile: returned %d\n", error));
 			return (error);
 		}
 
 		dprintf(("ntfs_lookup: found ino: %d\n", 
 			VTONT(*ap->a_vpp)->i_number));
 
 		if(!lockparent || !(cnp->cn_flags & ISLASTCN))
 			VOP__UNLOCK(dvp, 0, cnp->cn_proc);
 	}
 
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(dvp, *ap->a_vpp, cnp);
 
 	return (error);
 }
 
 #if defined(__FreeBSD__)
 /*
  * Flush the blocks of a file to disk.
  *
  * This function is worthless for vnodes that represent directories. Maybe we
  * could just do a sync if they try an fsync on a directory file.
  */
 static int
 ntfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	return (0);
 }
 #endif
 
 /*
  * Return POSIX pathconf information applicable to NTFS filesystem
  */
 int
 ntfs_pathconf(v)
 	void *v;
 {
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		register_t *a_retval;
 	} */ *ap = v;
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NTFS_MAXFILENAME;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 0;
 		return (0);
 #if defined(__NetBSD__)
 	case _PC_SYNC_IO:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		return (0);
 #endif
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Global vfs data structures
  */
 vop_t **ntfs_vnodeop_p;
 #if defined(__FreeBSD__)
 static
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *)vop_defaultop },
 
 	{ &vop_getattr_desc, (vop_t *)ntfs_getattr },
 	{ &vop_inactive_desc, (vop_t *)ntfs_inactive },
 	{ &vop_reclaim_desc, (vop_t *)ntfs_reclaim },
 	{ &vop_print_desc, (vop_t *)ntfs_print },
 	{ &vop_pathconf_desc, ntfs_pathconf },
 
 	{ &vop_islocked_desc, (vop_t *)vop_stdislocked },
 	{ &vop_unlock_desc, (vop_t *)vop_stdunlock },
 	{ &vop_lock_desc, (vop_t *)vop_stdlock },
 	{ &vop_cachedlookup_desc, (vop_t *)ntfs_lookup },
 	{ &vop_lookup_desc, (vop_t *)vfs_cache_lookup },
 
 	{ &vop_access_desc, (vop_t *)ntfs_access },
 	{ &vop_close_desc, (vop_t *)ntfs_close },
 	{ &vop_open_desc, (vop_t *)ntfs_open },
 	{ &vop_readdir_desc, (vop_t *)ntfs_readdir },
 	{ &vop_fsync_desc, (vop_t *)ntfs_fsync },
 
 	{ &vop_bmap_desc, (vop_t *)ntfs_bmap },
 	{ &vop_getpages_desc, (vop_t *) ntfs_getpages },
 	{ &vop_putpages_desc, (vop_t *) ntfs_putpages },
 	{ &vop_strategy_desc, (vop_t *)ntfs_strategy },
-	{ &vop_bwrite_desc, (vop_t *)vop_stdbwrite },
 	{ &vop_read_desc, (vop_t *)ntfs_read },
 	{ &vop_write_desc, (vop_t *)ntfs_write },
 
 	{ NULL, NULL }
 };
 
 static
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 VNODEOP_SET(ntfs_vnodeop_opv_desc);
 
 #else /* !FreeBSD */
 
 struct vnodeopv_entry_desc ntfs_vnodeop_entries[] = {
 	{ &vop_default_desc, (vop_t *) ntfs_bypass },
 	{ &vop_lookup_desc, (vop_t *) ntfs_lookup },	/* lookup */
 	{ &vop_create_desc, genfs_eopnotsupp },		/* create */
 	{ &vop_mknod_desc, genfs_eopnotsupp },		/* mknod */
 	{ &vop_open_desc, (vop_t *) ntfs_open },	/* open */
 	{ &vop_close_desc,(vop_t *)  ntfs_close },	/* close */
 	{ &vop_access_desc, (vop_t *) ntfs_access },	/* access */
 	{ &vop_getattr_desc, (vop_t *) ntfs_getattr },	/* getattr */
 	{ &vop_setattr_desc, genfs_eopnotsupp },	/* setattr */
 	{ &vop_read_desc, (vop_t *) ntfs_read },	/* read */
 	{ &vop_write_desc, (vop_t *) ntfs_write },	/* write */
 	{ &vop_lease_desc, genfs_lease_check },		/* lease */
 	{ &vop_fcntl_desc, genfs_fcntl },		/* fcntl */
 	{ &vop_ioctl_desc, genfs_enoioctl },		/* ioctl */
 	{ &vop_poll_desc, genfs_poll },			/* poll */
 	{ &vop_revoke_desc, genfs_revoke },		/* revoke */
 	{ &vop_fsync_desc, genfs_fsync },		/* fsync */
 	{ &vop_seek_desc, genfs_seek },			/* seek */
 	{ &vop_remove_desc, genfs_eopnotsupp },		/* remove */
 	{ &vop_link_desc, genfs_eopnotsupp },		/* link */
 	{ &vop_rename_desc, genfs_eopnotsupp },		/* rename */
 	{ &vop_mkdir_desc, genfs_eopnotsupp },		/* mkdir */
 	{ &vop_rmdir_desc, genfs_eopnotsupp },		/* rmdir */
 	{ &vop_symlink_desc, genfs_eopnotsupp },	/* symlink */
 	{ &vop_readdir_desc, (vop_t *) ntfs_readdir },	/* readdir */
 	{ &vop_readlink_desc, genfs_eopnotsupp },	/* readlink */
 	{ &vop_abortop_desc, genfs_abortop },		/* abortop */
 	{ &vop_inactive_desc, (vop_t *) ntfs_inactive },	/* inactive */
 	{ &vop_reclaim_desc, (vop_t *) ntfs_reclaim },	/* reclaim */
 	{ &vop_lock_desc, genfs_lock },			/* lock */
 	{ &vop_unlock_desc, genfs_unlock },		/* unlock */
 	{ &vop_bmap_desc, (vop_t *) ntfs_bmap },	/* bmap */
 	{ &vop_strategy_desc, (vop_t *) ntfs_strategy },	/* strategy */
 	{ &vop_print_desc, (vop_t *) ntfs_print },	/* print */
 	{ &vop_islocked_desc, genfs_islocked },		/* islocked */
 	{ &vop_pathconf_desc, ntfs_pathconf },		/* pathconf */
 	{ &vop_advlock_desc, genfs_nullop },		/* advlock */
 	{ &vop_blkatoff_desc, genfs_eopnotsupp },	/* blkatoff */
 	{ &vop_valloc_desc, genfs_eopnotsupp },		/* valloc */
 	{ &vop_reallocblks_desc, genfs_eopnotsupp },	/* reallocblks */
 	{ &vop_vfree_desc, genfs_eopnotsupp },		/* vfree */
 	{ &vop_truncate_desc, genfs_eopnotsupp },	/* truncate */
 	{ &vop_update_desc, genfs_eopnotsupp },		/* update */
 	{ &vop_bwrite_desc, vn_bwrite },		/* bwrite */
 	{ (struct vnodeop_desc *)NULL, (int (*) __P((void *)))NULL }
 };
 struct vnodeopv_desc ntfs_vnodeop_opv_desc =
 	{ &ntfs_vnodeop_p, ntfs_vnodeop_entries };
 
 #endif
Index: head/sys/sys/buf.h
===================================================================
--- head/sys/sys/buf.h	(revision 75579)
+++ head/sys/sys/buf.h	(revision 75580)
@@ -1,548 +1,561 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
  * $FreeBSD$
  */
 
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
 #include <sys/queue.h>
 #include <sys/lock.h>
 
 struct bio;
 struct buf;
 struct mount;
 struct vnode;
 
 /*
  * To avoid including <ufs/ffs/softdep.h> 
  */   
 LIST_HEAD(workhead, worklist);
 /*
  * These are currently used only by the soft dependency code, hence
  * are stored once in a global variable. If other subsystems wanted
  * to use these hooks, a pointer to a set of bio_ops could be added
  * to each buffer.
  */
 extern struct bio_ops {
 	void	(*io_start) __P((struct buf *));
 	void	(*io_complete) __P((struct buf *));
 	void	(*io_deallocate) __P((struct buf *));
 	void	(*io_movedeps) __P((struct buf *, struct buf *));
 	int	(*io_countdeps) __P((struct buf *, int));
 } bioops;
 
+struct buf_ops {
+	char	*bop_name;
+	int	(*bop_write) __P((struct buf *));
+};
+
+extern struct buf_ops buf_ops_bio;
+
 /*
  * The buffer header describes an I/O operation in the kernel.
  *
  * NOTES:
  *	b_bufsize, b_bcount.  b_bufsize is the allocation size of the
  *	buffer, either DEV_BSIZE or PAGE_SIZE aligned.  b_bcount is the
  *	originally requested buffer size and can serve as a bounds check
  *	against EOF.  For most, but not all uses, b_bcount == b_bufsize.
  *
  *	b_dirtyoff, b_dirtyend.  Buffers support piecemeal, unaligned
  *	ranges of dirty data that need to be written to backing store.
  *	The range is typically clipped at b_bcount ( not b_bufsize ).
  *
  *	b_resid.  Number of bytes remaining in I/O.  After an I/O operation
  *	completes, b_resid is usually 0 indicating 100% success.
  */
 struct buf {
 	/* XXX: b_io must be the first element of struct buf for now /phk */
 	struct bio b_io;		/* "Builtin" I/O request. */
 #define	b_bcount	b_io.bio_bcount
 #define	b_blkno		b_io.bio_blkno
 #define	b_caller1	b_io.bio_caller1
 #define	b_data		b_io.bio_data
 #define	b_dev		b_io.bio_dev
 #define	b_driver1	b_io.bio_driver1
 #define	b_driver2	b_io.bio_driver2
 #define	b_error		b_io.bio_error
 #define	b_iocmd		b_io.bio_cmd
 #define	b_ioflags	b_io.bio_flags
 #define	b_pblkno	b_io.bio_pblkno
 #define	b_resid		b_io.bio_resid
+	struct buf_ops	*b_op;
+	unsigned		b_magic;
+#define B_MAGIC_BIO	0x10b10b10
+#define B_MAGIC_NFS	0x67238234
 	void	(*b_iodone) __P((struct buf *));
 	off_t	b_offset;		/* Offset into file. */
 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
 	TAILQ_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
 	long	b_flags;		/* B_* flags. */
 	unsigned short b_qindex;	/* buffer queue index */
 	unsigned char b_xflags;		/* extra flags */
 	struct lock b_lock;		/* Buffer lock */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_runningbufspace;	/* when I/O is running, pipelining */
 	caddr_t	b_kvabase;		/* base kva for buffer */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t	b_lblkno;		/* Logical block number. */
 	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
 	union	pager_info {
 		void	*pg_spc;
 		int	pg_reqpage;
 	} b_pager;
 	union	cluster_info {
 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
 		TAILQ_ENTRY(buf) cluster_entry;
 	} b_cluster;
 	struct	vm_page *b_pages[btoc(MAXPHYS)];
 	int		b_npages;
 	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 #define b_spc	b_pager.pg_spc
 
 /*
  * These flags are kept in b_flags.
  *
  * Notes:
  *
  *	B_ASYNC		VOP calls on bp's are usually async whether or not
  *			B_ASYNC is set, but some subsystems, such as NFS, like 
  *			to know what is best for the caller so they can
  *			optimize the I/O.
  *
  *	B_PAGING	Indicates that bp is being used by the paging system or
  *			some paging system and that the bp is not linked into
  *			the b_vp's clean/dirty linked lists or ref counts.
  *			Buffer vp reassignments are illegal in this case.
  *
  *	B_CACHE		This may only be set if the buffer is entirely valid.
  *			The situation where B_DELWRI is set and B_CACHE is
  *			clear MUST be committed to disk by getblk() so 
  *			B_DELWRI can also be cleared.  See the comments for
  *			getblk() in kern/vfs_bio.c.  If B_CACHE is clear,
  *			the caller is expected to clear BIO_ERROR and B_INVAL,
  *			set BIO_READ, and initiate an I/O.
  *
  *			The 'entire buffer' is defined to be the range from
  *			0 through b_bcount.
  *
  *	B_MALLOC	Request that the buffer be allocated from the malloc
  *			pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
  *
  *	B_CLUSTEROK	This flag is typically set for B_DELWRI buffers
  *			by filesystems that allow clustering when the buffer
  *			is fully dirty and indicates that it may be clustered
  *			with other adjacent dirty buffers.  Note the clustering
  *			may not be used with the stage 1 data write under NFS
  *			but may be used for the commit rpc portion.
  *
  *	B_VMIO		Indicates that the buffer is tied into an VM object.
  *			The buffer's data is always PAGE_SIZE aligned even
  *			if b_bufsize and b_bcount are not.  ( b_bufsize is 
  *			always at least DEV_BSIZE aligned, though ).
  *	
  */
 
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
 #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_UNUSED0	0x00000008	/* Old B_BAD */
 #define	B_DEFERRED	0x00000010	/* Skipped over for cleaning */
 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
 #define	B_VALIDSUSPWRT	0x00000040	/* Valid write during suspension. */
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
 #define	B_00000800	0x00000800	/* Available flag. */
 #define	B_SCANNED	0x00001000	/* VOP_FSYNC funcs mark written bufs */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_DIRTY		0x00200000	/* Needs writing later. */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_WANT		0x00800000	/* Used by vm_pager.c */
 #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define	B_08000000	0x08000000	/* Available flag. */
 #define B_RAM		0x10000000	/* Read ahead mark (flag) */
 #define B_VMIO		0x20000000	/* VMIO flag */
 #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
 #define B_80000000	0x80000000	/* Available flag. */
 
 #define PRINT_BUF_FLAGS "\20\40autochain\37cluster\36vmio\35ram\34ordered" \
 	"\33paging\32xxx\31writeinprog\30want\27relbuf\26dirty" \
 	"\25read\24raw\23phys\22clusterok\21malloc\20nocache" \
 	"\17locked\16inval\15scanned\14error\13eintr\12done\11freebuf" \
 	"\10delwri\7call\6cache\4bad\3async\2needcommit\1age"
 
 /*
  * These flags are kept in b_xflags.
  */
 #define	BX_VNDIRTY	0x00000001	/* On vnode dirty list */
 #define	BX_VNCLEAN	0x00000002	/* On vnode clean list */
 #define	BX_BKGRDWRITE	0x00000004	/* Do writes in background */
 #define	BX_BKGRDINPROG	0x00000008	/* Background write in progress */
 #define	BX_BKGRDWAIT	0x00000010	/* Background write waiting */
 
 #define	NOOFFSET	(-1LL)		/* No buffer offset calculated yet */
 
 #ifdef _KERNEL
 /*
  * Buffer locking
  */
 extern struct mtx buftimelock;		/* Interlock on setting prio and timo */
 extern char *buf_wmesg;			/* Default buffer lock message */
 #define BUF_WMESG "bufwait"
 #include <sys/proc.h>			/* XXX for curproc */
 #include <sys/mutex.h>
 
 /*
  * Initialize a lock.
  */
 #define BUF_LOCKINIT(bp) \
 	lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
 /*
  *
  * Get a lock sleeping non-interruptably until it becomes available.
  */
 static __inline int BUF_LOCK __P((struct buf *, int));
 static __inline int
 BUF_LOCK(struct buf *bp, int locktype)
 {
 	int s, ret;
 
 	s = splbio();
 	mtx_lock(&buftimelock);
 	locktype |= LK_INTERLOCK;
 	bp->b_lock.lk_wmesg = buf_wmesg;
 	bp->b_lock.lk_prio = PRIBIO + 4;
 	bp->b_lock.lk_timo = 0;
 	ret = lockmgr(&(bp)->b_lock, locktype, &buftimelock, curproc);
 	splx(s);
 	return ret;
 }
 /*
  * Get a lock sleeping with specified interruptably and timeout.
  */
 static __inline int BUF_TIMELOCK __P((struct buf *, int, char *, int, int));
 static __inline int
 BUF_TIMELOCK(struct buf *bp, int locktype, char *wmesg, int catch, int timo)
 {
 	int s, ret;
 
 	s = splbio();
 	mtx_lock(&buftimelock);
 	locktype |= LK_INTERLOCK;
 	bp->b_lock.lk_wmesg = wmesg;
 	bp->b_lock.lk_prio = (PRIBIO + 4) | catch;
 	bp->b_lock.lk_timo = timo;
 	ret = lockmgr(&(bp)->b_lock, (locktype), &buftimelock, curproc);
 	splx(s);
 	return ret;
 }
 /*
  * Release a lock. Only the acquiring process may free the lock unless
  * it has been handed off to biodone.
  */
 static __inline void BUF_UNLOCK __P((struct buf *));
 static __inline void
 BUF_UNLOCK(struct buf *bp)
 {
 	int s;
 
 	s = splbio();
 	lockmgr(&(bp)->b_lock, LK_RELEASE, NULL, curproc);
 	splx(s);
 }
 
 /*
  * Free a buffer lock.
  */
 #define BUF_LOCKFREE(bp) 			\
 do {						\
 	if (BUF_REFCNT(bp) > 0)			\
 		panic("free locked buf");	\
 	lockdestroy(&(bp)->b_lock);		\
 } while (0)
 
 #ifdef _SYS_PROC_H_	/* Avoid #include <sys/proc.h> pollution */
 /*
  * When initiating asynchronous I/O, change ownership of the lock to the
  * kernel. Once done, the lock may legally released by biodone. The
  * original owning process can no longer acquire it recursively, but must
  * wait until the I/O is completed and the lock has been freed by biodone.
  */
 static __inline void BUF_KERNPROC __P((struct buf *));
 static __inline void
 BUF_KERNPROC(struct buf *bp)
 {
 	struct proc *p = curproc;
 
 	if (p != PCPU_GET(idleproc) && bp->b_lock.lk_lockholder == p->p_pid)
 		p->p_locks--;
 	bp->b_lock.lk_lockholder = LK_KERNPROC;
 }
 #endif
 /*
  * Find out the number of references to a lock.
  */
 static __inline int BUF_REFCNT __P((struct buf *));
 static __inline int
 BUF_REFCNT(struct buf *bp)
 {
 	int s, ret;
 
 	s = splbio();
 	ret = lockcount(&(bp)->b_lock);
 	splx(s);
 	return ret;
 }
 
 #endif /* _KERNEL */
 
 struct buf_queue_head {
 	TAILQ_HEAD(buf_queue, buf) queue;
 	daddr_t	last_pblkno;
 	struct	buf *insert_point;
 	struct	buf *switch_point;
 };
 
 /*
  * This structure describes a clustered I/O.  It is stored in the b_saveaddr
  * field of the buffer on which I/O is done.  At I/O completion, cluster
  * callback uses the structure to parcel I/O's to individual buffers, and
  * then free's this structure.
  */
 struct cluster_save {
 	long	bs_bcount;		/* Saved b_bcount. */
 	long	bs_bufsize;		/* Saved b_bufsize. */
 	void	*bs_saveaddr;		/* Saved b_addr. */
 	int	bs_nchildren;		/* Number of associated buffers. */
 	struct buf **bs_children;	/* List of associated buffers. */
 };
 
 #ifdef _KERNEL
 static __inline void bufq_init __P((struct buf_queue_head *head));
 static __inline void bufq_insert_tail __P((struct buf_queue_head *head,
 					   struct buf *bp));
 static __inline void bufq_remove __P((struct buf_queue_head *head,
 				      struct buf *bp));
 static __inline struct buf *bufq_first __P((struct buf_queue_head *head));
 
 static __inline void
 bufq_init(struct buf_queue_head *head)
 {
 	TAILQ_INIT(&head->queue);
 	head->last_pblkno = 0;
 	head->insert_point = NULL;
 	head->switch_point = NULL;
 }
 
 static __inline void
 bufq_insert_tail(struct buf_queue_head *head, struct buf *bp)
 {
 	if ((bp->b_ioflags & BIO_ORDERED) != 0) {
 		head->insert_point = bp;
 		head->switch_point = NULL;
 	}
 	TAILQ_INSERT_TAIL(&head->queue, bp, b_act);
 }
 
 static __inline void
 bufq_remove(struct buf_queue_head *head, struct buf *bp)
 {
 	if (bp == head->switch_point)
 		head->switch_point = TAILQ_NEXT(bp, b_act);
 	if (bp == head->insert_point) {
 		head->insert_point = TAILQ_PREV(bp, buf_queue, b_act);
 		if (head->insert_point == NULL)
 			head->last_pblkno = 0;
 	} else if (bp == TAILQ_FIRST(&head->queue))
 		head->last_pblkno = bp->b_pblkno;
 	TAILQ_REMOVE(&head->queue, bp, b_act);
 	if (TAILQ_FIRST(&head->queue) == head->switch_point)
 		head->switch_point = NULL;
 }
 
 static __inline struct buf *
 bufq_first(struct buf_queue_head *head)
 {
 	return (TAILQ_FIRST(&head->queue));
 }
 
-#define BUF_WRITE(bp)		VOP_BWRITE((bp)->b_vp, (bp))
+#define BUF_WRITE(bp)					\
+	(bp)->b_op->bop_write(bp)
+
 #define BUF_STRATEGY(bp)	VOP_STRATEGY((bp)->b_vp, (bp))
 
 static __inline void
 buf_start(struct buf *bp)
 {
 	if (bioops.io_start)
 		(*bioops.io_start)(bp);
 }
 
 static __inline void
 buf_complete(struct buf *bp)
 {
 	if (bioops.io_complete)
 		(*bioops.io_complete)(bp);
 }
 
 static __inline void
 buf_deallocate(struct buf *bp)
 {
 	if (bioops.io_deallocate)
 		(*bioops.io_deallocate)(bp);
 	BUF_LOCKFREE(bp);
 }
 
 static __inline void
 buf_movedeps(struct buf *bp, struct buf *bp2)
 {
 	if (bioops.io_movedeps)
 		(*bioops.io_movedeps)(bp, bp2);
 }
 
 static __inline int
 buf_countdeps(struct buf *bp, int i)
 {
 	if (bioops.io_countdeps)
 		return ((*bioops.io_countdeps)(bp, i));
 	else
 		return (0);
 }
 
 #endif /* _KERNEL */
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	6	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_LOCKED	1	/* locked buffers */
 #define QUEUE_CLEAN	2	/* non-B_DELWRI buffers */
 #define QUEUE_DIRTY	3	/* B_DELWRI buffers */
 #define QUEUE_EMPTYKVA	4	/* empty buffer headers w/KVA assignment */
 #define QUEUE_EMPTY	5	/* empty buffer headers */
 
 /*
  * Zero out the buffer's data area.
  */
 #define	clrbuf(bp) {							\
 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
 	(bp)->b_resid = 0;						\
 }
 
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
 #define	B_METAONLY	0x04	/* Return indirect block buffer. */
 #define B_NOWAIT	0x08	/* do not sleep to await lock */
 
 #ifdef _KERNEL
 extern int	nbuf;			/* The number of buffer headers */
 extern int	runningbufspace;
 extern int      buf_maxio;              /* nominal maximum I/O for buffer */
 extern struct	buf *buf;		/* The buffer headers. */
 extern char	*buffers;		/* The buffer contents. */
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
 struct uio;
 
 caddr_t bufhashinit __P((caddr_t));
 void	bufinit __P((void));
 void	bwillwrite __P((void));
 int	buf_dirty_count_severe __P((void));
 void	bremfree __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 	    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 	    struct ucred *, struct buf **));
 int	bwrite __P((struct buf *));
 void	bdwrite __P((struct buf *));
 void	bawrite __P((struct buf *));
 void	bdirty __P((struct buf *));
 void	bundirty __P((struct buf *));
 int	bowrite __P((struct buf *));
 void	brelse __P((struct buf *));
 void	bqrelse __P((struct buf *));
 int	vfs_bio_awrite __P((struct buf *));
 struct buf *     getpbuf __P((int *));
 struct buf *incore __P((struct vnode *, daddr_t));
 struct buf *gbincore __P((struct vnode *, daddr_t));
 int	inmem __P((struct vnode *, daddr_t));
 struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
 struct buf *geteblk __P((int));
 int	bufwait __P((struct buf *));
 void	bufdone __P((struct buf *));
 void	bufdonebio __P((struct bio *));
 
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **));
 int	cluster_wbuild __P((struct vnode *, long, daddr_t, int));
 void	cluster_write __P((struct buf *, u_quad_t, int));
 void	vfs_bio_set_validclean __P((struct buf *, int base, int size));
 void	vfs_bio_clrbuf __P((struct buf *));
 void	vfs_busy_pages __P((struct buf *, int clear_modify));
 void	vfs_unbusy_pages __P((struct buf *));
 void	vwakeup __P((struct buf *));
 void	vmapbuf __P((struct buf *));
 void	vunmapbuf __P((struct buf *));
 void	relpbuf __P((struct buf *, int *));
 void	brelvp __P((struct buf *));
 void	bgetvp __P((struct vnode *, struct buf *));
 void	pbgetvp __P((struct vnode *, struct buf *));
 void	pbrelvp __P((struct buf *));
 int	allocbuf __P((struct buf *bp, int size));
 void	reassignbuf __P((struct buf *, struct vnode *));
 void	pbreassignbuf __P((struct buf *, struct vnode *));
 struct	buf *trypbuf __P((int *));
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_BUF_H_ */
Index: head/sys/sys/vnode.h
===================================================================
--- head/sys/sys/vnode.h	(revision 75579)
+++ head/sys/sys/vnode.h	(revision 75580)
@@ -1,660 +1,658 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
  * $FreeBSD$
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/queue.h>
 #include <sys/selinfo.h>
 #include <sys/uio.h>
 #include <sys/acl.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
 
 /*
  * Vnode tag types.
  * These are for the benefit of external programs only (e.g., pstat)
  * and should NEVER be inspected by the kernel.
  */
 enum vtagtype	{
 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
 	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
 	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS, VT_CODA, VT_NTFS,
 	VT_HPFS, VT_NWFS, VT_PSEUDOFS, VT_SMBFS
 };
 
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 TAILQ_HEAD(buflists, buf);
 
 typedef	int 	vop_t __P((void *));
 struct namecache;
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  * v_freelist is locked by the global vnode_free_list mutex.
  * v_mntvnodes is locked by the global mntvnodes mutex.
  * v_flag, v_usecount, v_holdcount and v_writecount are
  *    locked by the v_interlock mutex.
  * v_pollinfo is locked by the lock contained inside it.
  */
 struct vnode {
 	u_long	v_flag;				/* vnode flags (see below) */
 	int	v_usecount;			/* reference count of users */
 	int	v_writecount;			/* reference count of writers */
 	int	v_holdcnt;			/* page & buffer references */
 	u_long	v_id;				/* capability identifier */
 	struct	mount *v_mount;			/* ptr to vfs we are in */
 	vop_t	**v_op;				/* vnode operations vector */
 	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
 	LIST_ENTRY(vnode) v_synclist;		/* vnodes with dirty buffers */
 	long	v_numoutput;			/* num of writes in progress */
 	enum	vtype v_type;			/* vnode type */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
 		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
 		struct {
 			struct specinfo	*vu_specinfo; /* device (VCHR, VBLK) */
 			SLIST_ENTRY(vnode) vu_specnext;
 		} vu_spec;
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 	struct	nqlease *v_lease;		/* Soft reference to lease */
 	daddr_t	v_lastw;			/* last write (write cluster) */
 	daddr_t	v_cstart;			/* start block of cluster */
 	daddr_t	v_lasta;			/* last allocation */
 	int	v_clen;				/* length of current cluster */
 	struct vm_object *v_object;		/* Place to store VM object */
 	struct	mtx v_interlock;		/* lock on usecount and flag */
 	struct	lock v_lock;			/* used if fs don't have one */
 	struct	lock *v_vnlock;			/* pointer to vnode lock */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 	LIST_HEAD(, namecache) v_cache_src;	/* Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* Cache entries to us */
 	struct	vnode *v_dd;			/* .. vnode */
 	u_long	v_ddid;				/* .. capability identifier */
 	struct	{
 		struct	mtx vpi_lock;		/* lock to protect below */
 		struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 		short	vpi_events;		/* what they are looking for */
 		short	vpi_revents;		/* what has happened */
 	} v_pollinfo;
 	struct proc *v_vxproc;			/* proc owning VXLOCK */
 #ifdef	DEBUG_LOCKS
 	const char *filename;			/* Source file doing locking */
 	int line;				/* Line number doing locking */
 #endif
 };
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
 #define	v_rdev		v_un.vu_spec.vu_specinfo
 #define	v_specnext	v_un.vu_spec.vu_specnext
 #define	v_fifoinfo	v_un.vu_fifoinfo
 
 #define	VN_POLLEVENT(vp, events)				\
 	do {							\
 		if ((vp)->v_pollinfo.vpi_events & (events))	\
 			vn_pollevent((vp), (events));		\
 	} while (0)
 
 /*
  * Vnode flags.
  */
 #define	VROOT		0x00001	/* root of its file system */
 #define	VTEXT		0x00002	/* vnode is a pure text prototype */
 #define	VSYSTEM		0x00004	/* vnode being used by kernel */
 #define	VISTTY		0x00008	/* vnode represents a tty */
 #define	VXLOCK		0x00100	/* vnode is locked to change underlying type */
 #define	VXWANT		0x00200	/* process is waiting for vnode */
 #define	VBWAIT		0x00400	/* waiting for output to complete */
 /* open for business    0x00800 */
 /* open for business    0x01000 */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
 #define	VCOPYONWRITE    0x04000 /* vnode is doing copy-on-write */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
 #define	VDOOMED		0x40000	/* This vnode is being recycled */
 #define	VFREE		0x80000	/* This vnode is on the freelist */
 /* open for business	0x100000 */
 #define	VONWORKLST	0x200000 /* On syncer work-list */
 #define	VMOUNT		0x400000 /* Mount in progress */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
 	short		va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	udev_t		va_fsid;	/* file system id */
 	long		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	udev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define VA_EXCLUSIVE	0x02		/* exclusive create request */
 
 /*
  * Flags for ioflag. (high 16 bits used to ask for read-ahead and
  * help with write clustering)
  */
 #define	IO_UNIT		0x01		/* do I/O as atomic unit */
 #define	IO_APPEND	0x02		/* append write to end */
 #define	IO_SYNC		0x04		/* do I/O synchronously */
 #define	IO_NODELOCKED	0x08		/* underlying node already locked */
 #define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
 #define	IO_VMIO		0x20		/* data already in VMIO space */
 #define	IO_INVAL	0x40		/* invalidate after I/O */
 #define IO_ASYNC	0x80		/* bawrite rather then bdwrite */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
  */
 #define	VADMIN	010000		/* permission to administer vnode */
 #define	VSUID	004000		/* set user id on execution */
 #define	VSGID	002000		/* set group id on execution */
 #define	VSVTX	001000		/* save swapped text even after use */
 #define	VREAD	000400		/* read, write, execute permissions */
 #define	VWRITE	000200
 #define	VEXEC	000100
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 #ifdef _KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001	/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002	/* vflush: force file closure */
 #define	WRITECLOSE	0x0004	/* vflush: only close writable files */
 #define	DOCLOSE		0x0008	/* vclean: close active files */
 #define	V_SAVE		0x0001	/* vinvalbuf: sync file first */
 #define	REVOKEALL	0x0001	/* vop_revoke: revoke all aliases */
 #define	V_WAIT		0x0001	/* vn_start_write: sleep for suspend */
 #define	V_NOWAIT	0x0002	/* vn_start_write: don't sleep for suspend */
 #define	V_XSLEEP	0x0004	/* vn_start_write: just return after sleep */
 
 #define	VREF(vp)	vref(vp)
 
 
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 #define	VNODEOP_SET(f) \
 	C_SYSINIT(f##init, SI_SUB_VFS, SI_ORDER_SECOND, vfs_add_vnodeops, &f); \
 	C_SYSUNINIT(f##uninit, SI_SUB_VFS, SI_ORDER_SECOND, vfs_rm_vnodeops, &f);
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	int desiredvnodes;		/* number of vnodes desired */
 extern	time_t syncdelay;		/* max time to delay syncing data */
 extern	time_t filedelay;		/* time to delay syncing files */
 extern	time_t dirdelay;		/* time to delay syncing directories */
 extern	time_t metadelay;		/* time to delay syncing metadata */
 extern	struct vm_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
  */
 #define	LEASE_READ	0x1		/* Check lease for readers */
 #define	LEASE_WRITE	0x2		/* Check lease for modifiers */
 
 
 extern void	(*lease_updatetime) __P((int deltat));
 
 #define VSHOULDFREE(vp)	\
 	(!((vp)->v_flag & (VFREE|VDOOMED)) && \
 	 !(vp)->v_holdcnt && !(vp)->v_usecount && \
 	 (!(vp)->v_object || \
 	  !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
 
 #define VSHOULDBUSY(vp)	\
 	(((vp)->v_flag & VFREE) && \
 	 ((vp)->v_holdcnt || (vp)->v_usecount))
 
 #define	VI_LOCK(vp)	mtx_lock(&(vp)->v_interlock)
 #define	VI_TRYLOCK(vp)	mtx_trylock(&(vp)->v_interlock)
 #define	VI_UNLOCK(vp)	mtx_unlock(&(vp)->v_interlock)
 
 #endif /* _KERNEL */
 
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define VDESC_VP0_WILLRELE	0x0001
 #define VDESC_VP1_WILLRELE	0x0002
 #define VDESC_VP2_WILLRELE	0x0004
 #define VDESC_VP3_WILLRELE	0x0008
 #define VDESC_NOMAP_VPP		0x0100
 #define VDESC_VPP_WILLRELE	0x0200
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	int	vdesc_offset;		/* offset in vector--first for speed */
 	char    *vdesc_name;		/* a readable name for debugging */
 	int	vdesc_flags;		/* VDESC_* flags */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_proc_offset;	/* proc location, if any */
 	int	vdesc_componentname_offset; /* if any */
 	/*
 	 * Finally, we've got a list of private data (about each operation)
 	 * for each transport layer.  (Support to manage this list is not
 	 * yet part of BSD.)
 	 */
 	caddr_t	*vdesc_transports;
 };
 
 #ifdef _KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 /*
  * Interlock for scanning list of vnodes attached to a mountpoint
  */
 extern struct mtx mntvnode_mtx;
 
 /*
  * This macro is very helpful in defining those offsets in the vdesc struct.
  *
  * This is stolen from X11R4.  I ignored all the fancy stuff for
  * Crays, so if you decide to port this to such a serious machine,
  * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
  */
 #define VOPARG_OFFSET(p_type,field) \
         ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
 #define VOPARG_OFFSETOF(s_type,field) \
 	VOPARG_OFFSET(s_type*,field)
 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
 	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
 
 
 /*
  * This structure is used to configure the new vnodeops vector.
  */
 struct vnodeopv_entry_desc {
 	struct vnodeop_desc *opve_op;   /* which operation this is */
 	vop_t *opve_impl;		/* code implementing this operation */
 };
 struct vnodeopv_desc {
 			/* ptr to the ptr to the vector where op should go */
 	vop_t ***opv_desc_vector_p;
 	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
 };
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Macros to aid in tracing VFS locking problems.  Not totally
  * reliable since if the process sleeps between changing the lock
  * state and checking it with the assert, some other process could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  I find that 'cvs co src'
  * is a pretty good test.
  */
 
 /*
  * [dfr] Kludge until I get around to fixing all the vfs locking.
  */
 #define IS_LOCKING_VFS(vp)	((vp)->v_tag == VT_UFS		\
 				 || (vp)->v_tag == VT_MFS	\
 				 || (vp)->v_tag == VT_NFS	\
 				 || (vp)->v_tag == VT_LFS	\
 				 || (vp)->v_tag == VT_ISOFS	\
 				 || (vp)->v_tag == VT_MSDOSFS	\
 				 || (vp)->v_tag == VT_DEVFS)
 
 #define ASSERT_VOP_LOCKED(vp, str)					\
 do {									\
 	struct vnode *_vp = (vp);					\
 									\
 	if (_vp && IS_LOCKING_VFS(_vp) && !VOP_ISLOCKED(_vp, NULL))	\
 		panic("%s: %p is not locked but should be", str, _vp);	\
 } while (0)
 
 #define ASSERT_VOP_UNLOCKED(vp, str)					\
 do {									\
 	struct vnode *_vp = (vp);					\
 	int lockstate;							\
 									\
 	if (_vp && IS_LOCKING_VFS(_vp)) {				\
 		lockstate = VOP_ISLOCKED(_vp, curproc);			\
 		if (lockstate == LK_EXCLUSIVE)				\
 			panic("%s: %p is locked but should not be",	\
 			    str, _vp);					\
 	}								\
 } while (0)
 
 #define ASSERT_VOP_ELOCKED(vp, str)					\
 do {									\
 	struct vnode *_vp = (vp);					\
 									\
 	if (_vp && IS_LOCKING_VFS(_vp) &&				\
 	    VOP_ISLOCKED(_vp, curproc) != LK_EXCLUSIVE)			\
 		panic("%s: %p is not exclusive locked but should be",	\
 		    str, _vp);						\
 } while (0)
 
 #define ASSERT_VOP_ELOCKED_OTHER(vp, str)				\
 do {									\
 	struct vnode *_vp = (vp);					\
 									\
 	if (_vp && IS_LOCKING_VFS(_vp) &&				\
 	    VOP_ISLOCKED(_vp, curproc) != LK_EXCLOTHER)			\
 		panic("%s: %p is not exclusive locked by another proc",	\
 		    str, _vp);						\
 } while (0)
 
 #define ASSERT_VOP_SLOCKED(vp, str)					\
 do {									\
 	struct vnode *_vp = (vp);					\
 									\
 	if (_vp && IS_LOCKING_VFS(_vp) &&				\
 	    VOP_ISLOCKED(_vp, NULL) != LK_SHARED)			\
 		panic("%s: %p is not locked shared but should be",	\
 		    str, _vp);						\
 } while (0)
 
 #else
 
 #define ASSERT_VOP_LOCKED(vp, str)
 #define ASSERT_VOP_UNLOCKED(vp, str)
 
 #endif
 
 /*
  * VOCALL calls an op given an ops vector.  We break it out because BSD's
  * vclean changes the ops vector and then wants to call ops with the old
  * vector.
  */
 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
 #define VDESC(OP) (& __CONCAT(OP,_desc))
 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
 
 /*
  * VMIO support inline
  */
 
 extern int vmiodirenable;
  
 static __inline int
 vn_canvmio(struct vnode *vp) 
 {
     if (vp && (vp->v_type == VREG || (vmiodirenable && vp->v_type == VDIR)))
         return(TRUE); 
     return(FALSE); 
 }
 
 /*
  * Finally, include the default set of vnode operations.
  */
 #include "vnode_if.h"
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
 struct proc;
 struct stat;
 struct nstat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vnode;
-struct vop_bwrite_args;
 
 extern int	(*lease_check_hook) __P((struct vop_lease_args *));
 
 struct	vnode *addaliasu __P((struct vnode *vp, udev_t nvp_rdev));
 int 	bdevvp __P((dev_t dev, struct vnode **vpp));
 /* cache_* may belong in namei.h. */
 void	cache_enter __P((struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp));
 int	cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp));
 void	cache_purge __P((struct vnode *vp));
 void	cache_purgevfs __P((struct mount *mp));
 void	cvtstat __P((struct stat *st, struct ostat *ost));
 void	cvtnstat __P((struct stat *sb, struct nstat *nsb));
 int 	getnewvnode __P((enum vtagtype tag,
 	    struct mount *mp, vop_t **vops, struct vnode **vpp));
 int	lease_check __P((struct vop_lease_args *ap));
 int	spec_vnoperate __P((struct vop_generic_args *));
 int	speedup_syncer __P((void));
 int	textvp_fullpath __P((struct proc *p, char **retbuf, char **retfreebuf));
 int	vaccess __P((enum vtype type, mode_t file_mode, uid_t uid, gid_t gid,
 	    mode_t acc_mode, struct ucred *cred, int *privused));
 int	vaccess_acl_posix1e __P((enum vtype type, uid_t file_uid,
 	    gid_t file_gid, struct acl *acl, mode_t acc_mode,
 	    struct ucred *cred, int *privused));
 void 	vattr_null __P((struct vattr *vap));
 int 	vcount __P((struct vnode *vp));
 void	vdrop __P((struct vnode *));
 int	vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
 void	vfs_add_vnodeops __P((const void *));
 void	vfs_rm_vnodeops __P((const void *));
 int	vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
 int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
 void 	vgone __P((struct vnode *vp));
 void	vgonel __P((struct vnode *vp, struct proc *p));
 void	vhold __P((struct vnode *));
 int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
 
 	    struct proc *p, int slpflag, int slptimeo));
 int	vtruncbuf __P((struct vnode *vp, struct ucred *cred, struct proc *p,
 		off_t length, int blksize));
 void	vprint __P((char *label, struct vnode *vp));
 int	vrecycle __P((struct vnode *vp, struct mtx *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
 void	vn_finished_write __P((struct mount *mp));
 int	vn_isdisk __P((struct vnode *vp, int *errp));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 #ifdef	DEBUG_LOCKS
 int	debug_vn_lock __P((struct vnode *vp, int flags, struct proc *p,
 	    const char *filename, int line));
 #define vn_lock(vp,flags,p) debug_vn_lock(vp,flags,p,__FILE__,__LINE__)
 #endif
 int 	vn_open __P((struct nameidata *ndp, int *flagp, int cmode));
 void	vn_pollevent __P((struct vnode *vp, int events));
 void	vn_pollgone __P((struct vnode *vp));
 int	vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
 int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 int	vn_start_write __P((struct vnode *vp, struct mount **mpp, int flags));
 dev_t	vn_todev __P((struct vnode *vp));
 int	vn_write_suspend_wait __P((struct vnode *vp, struct mount *mp,
 		int flags));
 int 	vn_writechk __P((struct vnode *vp));
 int	vn_extattr_get __P((struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int *buflen, char *buf, struct proc *p));
 int	vn_extattr_set __P((struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, int buflen, char *buf, struct proc *p));
 int	vn_extattr_rm(struct vnode *vp, int ioflg, int attrnamespace,
 	    const char *attrname, struct proc *p);
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred));
 void	vfs_timestamp __P((struct timespec *));
 void	vfs_write_resume __P((struct mount *mp));
 void	vfs_write_suspend __P((struct mount *mp));
-int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
 int	vop_stdgetwritemount __P((struct vop_getwritemount_args *));
 int	vop_stdinactive __P((struct vop_inactive_args *));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
 int	vop_noislocked __P((struct vop_islocked_args *));
 int	vop_nolock __P((struct vop_lock_args *));
 int	vop_nopoll __P((struct vop_poll_args *));
 int	vop_nounlock __P((struct vop_unlock_args *));
 int	vop_stdpathconf __P((struct vop_pathconf_args *));
 int	vop_stdpoll __P((struct vop_poll_args *));
 int	vop_revoke __P((struct vop_revoke_args *));
 int	vop_sharedlock __P((struct vop_lock_args *));
 int	vop_eopnotsupp __P((struct vop_generic_args *ap));
 int	vop_ebadf __P((struct vop_generic_args *ap));
 int	vop_einval __P((struct vop_generic_args *ap));
 int	vop_enotty __P((struct vop_generic_args *ap));
 int	vop_defaultop __P((struct vop_generic_args *ap));
 int	vop_null __P((struct vop_generic_args *ap));
 int	vop_panic __P((struct vop_generic_args *ap));
 int	vop_stdcreatevobject __P((struct vop_createvobject_args *ap));
 int	vop_stddestroyvobject __P((struct vop_destroyvobject_args *ap));
 int	vop_stdgetvobject __P((struct vop_getvobject_args *ap));
 
 void	vfree __P((struct vnode *));
 void 	vput __P((struct vnode *vp));
 void 	vrele __P((struct vnode *vp));
 void	vref __P((struct vnode *vp));
 void	vbusy __P((struct vnode *vp));
 
 extern	vop_t **default_vnodeop_p;
 extern	vop_t **spec_vnodeop_p;
 extern	vop_t **dead_vnodeop_p;
 
 #endif /* _KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
Index: head/sys/ufs/mfs/mfs_vnops.c
===================================================================
--- head/sys/ufs/mfs/mfs_vnops.c	(revision 75579)
+++ head/sys/ufs/mfs/mfs_vnops.c	(revision 75580)
@@ -1,441 +1,440 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mfs_vnops.c	8.11 (Berkeley) 5/22/95
  * $FreeBSD$
  */
 
 #include "opt_ufs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/sysproto.h>
 #include <sys/mman.h>
 
 #include <ufs/ufs/extattr.h>
 
 #include <ufs/mfs/mfsnode.h>
 #include <ufs/mfs/mfs_extern.h>
 
 static int	mfs_badop __P((struct vop_generic_args *));
 static int	mfs_bmap __P((struct vop_bmap_args *));
 static int	mfs_close __P((struct vop_close_args *));
 static int	mfs_fsync __P((struct vop_fsync_args *));
 static int	mfs_freeblks __P((struct vop_freeblks_args *));
 static int	mfs_inactive __P((struct vop_inactive_args *)); /* XXX */
 static int	mfs_open __P((struct vop_open_args *));
 static int	mfs_reclaim __P((struct vop_reclaim_args *)); /* XXX */
 static int	mfs_print __P((struct vop_print_args *)); /* XXX */
 static int	mfs_strategy __P((struct vop_strategy_args *)); /* XXX */
 static int	mfs_getpages __P((struct vop_getpages_args *)); /* XXX */
 /*
  * mfs vnode operations.
  */
 vop_t **mfs_vnodeop_p;
 static struct vnodeopv_entry_desc mfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) mfs_badop },
 	{ &vop_bmap_desc,		(vop_t *) mfs_bmap },
-	{ &vop_bwrite_desc,		(vop_t *) vop_defaultop },
 	{ &vop_close_desc,		(vop_t *) mfs_close },
 	{ &vop_createvobject_desc,	(vop_t *) vop_stdcreatevobject },
 	{ &vop_destroyvobject_desc,	(vop_t *) vop_stddestroyvobject },
 	{ &vop_freeblks_desc,		(vop_t *) mfs_freeblks },
 	{ &vop_fsync_desc,		(vop_t *) mfs_fsync },
 #ifdef UFS_EXTATTR
 	{ &vop_getextattr_desc,		(vop_t *) ufs_vop_getextattr },
 #endif
 	{ &vop_getpages_desc,		(vop_t *) mfs_getpages },
 	{ &vop_getvobject_desc,		(vop_t *) vop_stdgetvobject },
 	{ &vop_inactive_desc,		(vop_t *) mfs_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) vop_enotty },
 	{ &vop_islocked_desc,		(vop_t *) vop_defaultop },
 	{ &vop_lock_desc,		(vop_t *) vop_defaultop },
 	{ &vop_open_desc,		(vop_t *) mfs_open },
 	{ &vop_print_desc,		(vop_t *) mfs_print },
 	{ &vop_reclaim_desc,		(vop_t *) mfs_reclaim },
 #ifdef UFS_EXTATTR
 	{ &vop_setextattr_desc,		(vop_t *) ufs_vop_setextattr },
 #endif
 	{ &vop_strategy_desc,		(vop_t *) mfs_strategy },
 	{ &vop_unlock_desc,		(vop_t *) vop_defaultop },
 	{ &vop_getwritemount_desc,	(vop_t *) vop_stdgetwritemount },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc mfs_vnodeop_opv_desc =
 	{ &mfs_vnodeop_p, mfs_vnodeop_entries };
 
 VNODEOP_SET(mfs_vnodeop_opv_desc);
 
 /*
  * Vnode Operations.
  *
  * Open called to allow memory filesystem to initialize and
  * validate before actual IO. Record our process identifier
  * so we can tell when we are doing I/O to ourself.
  */
 /* ARGSUSED */
 static int
 mfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	if (ap->a_vp->v_type != VCHR)
 		panic("mfs_open not VCHR");
 	return (0);
 }
 
 static int
 mfs_fsync(ap)
 	struct vop_fsync_args *ap;
 {
 
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_fsync), ap));
 }
 
 /*
  * mfs_freeblks() - hook to allow us to free physical memory.
  *
  *	We implement the BIO_DELETE strategy.  We can't just madvise()
  *	here because we have to do it in the correct order vs other bio
  *	requests, so we queue it.
  *
  *	Note: geteblk() sets B_INVAL.  We leave it set to guarentee buffer
  *	throw-away on brelse()? XXX
  */
 
 static int
 mfs_freeblks(ap)
         struct vop_freeblks_args /* {   
                 struct vnode *a_vp;     
                 daddr_t a_addr;         
                 daddr_t a_length;       
         } */ *ap;
 {       
 	struct buf *bp;
 	struct vnode *vp;
 
 	if (!vfinddev(ap->a_vp->v_rdev, VCHR, &vp) || vp->v_usecount == 0)
 		panic("mfs_freeblks: bad dev");
 
 	bp = geteblk(ap->a_length);
 	bp->b_flags |= B_ASYNC;
 	bp->b_iocmd = BIO_DELETE;
 	bp->b_dev = ap->a_vp->v_rdev;
 	bp->b_blkno = ap->a_addr;
 	bp->b_offset = dbtob(ap->a_addr);
 	bp->b_bcount = ap->a_length;
 	BUF_KERNPROC(bp);
 	VOP_STRATEGY(vp, bp);
 	return(0);
 }
 
 /*
  * Pass I/O requests to the memory filesystem process.
  */
 static int
 mfs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct bio *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = (struct buf *)ap->a_bp;
 	register struct mfsnode *mfsp;
 	struct vnode *vp;
 	struct proc *p = curproc;		/* XXX */
 	int s;
 
 	if (!vfinddev(bp->b_dev, VCHR, &vp) || vp->v_usecount == 0)
 		panic("mfs_strategy: bad dev");
 	mfsp = VTOMFS(vp);
 
 	/*
 	 * splbio required for queueing/dequeueing, in case of forwarded
 	 * BPs from bio interrupts (?).  It may not be necessary.
 	 */
 
 	s = splbio();
 
 	if (mfsp->mfs_pid == 0) {
 		/*
 		 * mini-root.  Note: BIO_DELETE not supported at the moment,
 		 * I'm not sure what kind of dataspace b_data is in.
 		 */
 		caddr_t base;
 
 		base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
 		if (bp->b_iocmd == BIO_DELETE)
 			;
 		if (bp->b_iocmd == BIO_READ)
 			bcopy(base, bp->b_data, bp->b_bcount);
 		else
 			bcopy(bp->b_data, base, bp->b_bcount);
 		bufdone(bp);
 	} else if (mfsp->mfs_pid == p->p_pid) {
 		/*
 		 * VOP to self
 		 */
 		splx(s);
 		mfs_doio(bp, mfsp);
 		s = splbio();
 	} else {
 		/*
 		 * VOP from some other process, queue to MFS process and
 		 * wake it up.
 		 */
 		bufq_insert_tail(&mfsp->buf_queue, bp);
 		wakeup((caddr_t)vp);
 	}
 	splx(s);
 	return (0);
 }
 
 /*
  * Memory file system I/O.
  *
  * Trivial on the HP since buffer has already been mapping into KVA space.
  *
  * Read and Write are handled with a simple copyin and copyout.    
  *
  * We also partially support VOP_FREEBLKS() via BIO_DELETE.  We can't implement
  * completely -- for example, on fragments or inode metadata, but we can
  * implement it for page-aligned requests.
  */
 void
 mfs_doio(bp, mfsp)
 	register struct buf *bp;
 	struct mfsnode *mfsp;
 {
 	caddr_t base = mfsp->mfs_baseoff + (bp->b_blkno << DEV_BSHIFT);
 
 	if (bp->b_iocmd == BIO_DELETE) {
 		/*
 		 * Implement BIO_DELETE, which allows the filesystem to tell
 		 * a block device when blocks are no longer needed (like when
 		 * a file is deleted).  We use the hook to MADV_FREE the VM.
 		 * This makes an MFS filesystem work as well or better then
 		 * a sun-style swap-mounted filesystem.
 		 */
 		int bytes = bp->b_bcount;
 
 		if ((vm_offset_t)base & PAGE_MASK) {
 			int n = PAGE_SIZE - ((vm_offset_t)base & PAGE_MASK);
 			bytes -= n;
 			base += n;
 		}
                 if (bytes > 0) {
                         struct madvise_args uap;
 
 			bytes &= ~PAGE_MASK;
 			if (bytes != 0) {
 				bzero(&uap, sizeof(uap));
 				uap.addr  = base;
 				uap.len   = bytes;
 				uap.behav = MADV_FREE;
 				madvise(curproc, &uap);
 			}
                 }
 		bp->b_error = 0;
 	} else if (bp->b_iocmd == BIO_READ) {
 		/*
 		 * Read data from our 'memory' disk
 		 */
 		bp->b_error = copyin(base, bp->b_data, bp->b_bcount);
 	} else {
 		/*
 		 * Write data to our 'memory' disk
 		 */
 		bp->b_error = copyout(bp->b_data, base, bp->b_bcount);
 	}
 	if (bp->b_error)
 		bp->b_ioflags |= BIO_ERROR;
 	bufdone(bp);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 mfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		ufs_daddr_t  a_bn;
 		struct vnode **a_vpp;
 		ufs_daddr_t *a_bnp;
 		int *a_runp;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	return (0);
 }
 
 /*
  * Memory filesystem close routine
  */
 /* ARGSUSED */
 static int
 mfs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct mfsnode *mfsp = VTOMFS(vp);
 	register struct buf *bp;
 	int error;
 
 	/*
 	 * Finish any pending I/O requests.
 	 */
 	while ((bp = bufq_first(&mfsp->buf_queue)) != NULL) {
 		bufq_remove(&mfsp->buf_queue, bp);
 		mfs_doio(bp, mfsp);
 		wakeup((caddr_t)bp);
 	}
 	/*
 	 * On last close of a memory filesystem
 	 * we must invalidate any in core blocks, so that
 	 * we can, free up its vnode.
 	 */
 	if ((error = vinvalbuf(vp, 1, ap->a_cred, ap->a_p, 0, 0)) != 0)
 		return (error);
 	/*
 	 * There should be no way to have any more uses of this
 	 * vnode, so if we find any other uses, it is a panic.
 	 */
 	if (vp->v_usecount > 1)
 		printf("mfs_close: ref count %d > 1\n", vp->v_usecount);
 	if (vp->v_usecount > 1 || (bufq_first(&mfsp->buf_queue) != NULL))
 		panic("mfs_close");
 	/*
 	 * Send a request to the filesystem server to exit.
 	 */
 	mfsp->mfs_active = 0;
 	wakeup((caddr_t)vp);
 	return (0);
 }
 
 /*
  * Memory filesystem inactive routine
  */
 /* ARGSUSED */
 static int
 mfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct mfsnode *mfsp = VTOMFS(vp);
 
 	if (bufq_first(&mfsp->buf_queue) != NULL)
 		panic("mfs_inactive: not inactive (next buffer %p)",
 			bufq_first(&mfsp->buf_queue));
 	VOP_UNLOCK(vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Reclaim a memory filesystem devvp so that it can be reused.
  */
 static int
 mfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	FREE(vp->v_data, M_MFSNODE);
 	vp->v_data = NULL;
 	return (0);
 }
 
 /*
  * Print out the contents of an mfsnode.
  */
 static int
 mfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct mfsnode *mfsp = VTOMFS(ap->a_vp);
 
 	printf("tag VT_MFS, pid %ld, base %p, size %ld\n",
 	    (long)mfsp->mfs_pid, (void *)mfsp->mfs_baseoff, mfsp->mfs_size);
 	return (0);
 }
 
 /*
  * Block device bad operation
  */
 static int
 mfs_badop(struct vop_generic_args *ap)
 {
 	int i;
 
 	printf("mfs_badop[%s]\n", ap->a_desc->vdesc_name);
 	i = vop_defaultop(ap);
 	printf("mfs_badop[%s] = %d\n", ap->a_desc->vdesc_name,i);
 	return (i);
 }
 
 
 static int
 mfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_getpages), ap));
 }
Index: head/sys/vm/vm_pager.c
===================================================================
--- head/sys/vm/vm_pager.c	(revision 75579)
+++ head/sys/vm/vm_pager.c	(revision 75580)
@@ -1,485 +1,487 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * The Mach Operating System project at Carnegie-Mellon University.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)vm_pager.c	8.6 (Berkeley) 1/12/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
  * All rights reserved.
  *
  * Authors: Avadis Tevanian, Jr., Michael Wayne Young
  *
  * Permission to use, copy, modify and distribute this software and
  * its documentation is hereby granted, provided that both the copyright
  * notice and this permission notice appear in all copies of the
  * software, derivative works or modified versions, and any portions
  * thereof, and that both notices appear in supporting documentation.
  *
  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  *
  * Carnegie Mellon requests users of this software to return to
  *
  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  *  School of Computer Science
  *  Carnegie Mellon University
  *  Pittsburgh PA 15213-3890
  *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  *
  * $FreeBSD$
  */
 
 /*
  *	Paging space routine stubs.  Emulates a matchmaker-like interface
  *	for builtin pagers.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/ucred.h>
 #include <sys/malloc.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
 MALLOC_DEFINE(M_VMPGDATA, "VM pgdata", "XXX: VM pager private data");
 
 extern struct pagerops defaultpagerops;
 extern struct pagerops swappagerops;
 extern struct pagerops vnodepagerops;
 extern struct pagerops devicepagerops;
 extern struct pagerops physpagerops;
 
 int cluster_pbuf_freecnt = -1;	/* unlimited to begin with */
 
 static int dead_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
 static vm_object_t dead_pager_alloc __P((void *, vm_ooffset_t, vm_prot_t,
 	vm_ooffset_t));
 static void dead_pager_putpages __P((vm_object_t, vm_page_t *, int, int, int *));
 static boolean_t dead_pager_haspage __P((vm_object_t, vm_pindex_t, int *, int *));
 static void dead_pager_dealloc __P((vm_object_t));
 
 static int
 dead_pager_getpages(obj, ma, count, req)
 	vm_object_t obj;
 	vm_page_t *ma;
 	int count;
 	int req;
 {
 	return VM_PAGER_FAIL;
 }
 
 static vm_object_t
 dead_pager_alloc(handle, size, prot, off)
 	void *handle;
 	vm_ooffset_t size;
 	vm_prot_t prot;
 	vm_ooffset_t off;
 {
 	return NULL;
 }
 
 static void
 dead_pager_putpages(object, m, count, flags, rtvals)
 	vm_object_t object;
 	vm_page_t *m;
 	int count;
 	int flags;
 	int *rtvals;
 {
 	int i;
 
 	for (i = 0; i < count; i++) {
 		rtvals[i] = VM_PAGER_AGAIN;
 	}
 }
 
 static int
 dead_pager_haspage(object, pindex, prev, next)
 	vm_object_t object;
 	vm_pindex_t pindex;
 	int *prev;
 	int *next;
 {
 	if (prev)
 		*prev = 0;
 	if (next)
 		*next = 0;
 	return FALSE;
 }
 
 static void
 dead_pager_dealloc(object)
 	vm_object_t object;
 {
 	return;
 }
 
 static struct pagerops deadpagerops = {
 	NULL,
 	dead_pager_alloc,
 	dead_pager_dealloc,
 	dead_pager_getpages,
 	dead_pager_putpages,
 	dead_pager_haspage,
 	NULL
 };
 
 struct pagerops *pagertab[] = {
 	&defaultpagerops,	/* OBJT_DEFAULT */
 	&swappagerops,		/* OBJT_SWAP */
 	&vnodepagerops,		/* OBJT_VNODE */
 	&devicepagerops,	/* OBJT_DEVICE */
 	&physpagerops,		/* OBJT_PHYS */
 	&deadpagerops		/* OBJT_DEAD */
 };
 
 int npagers = sizeof(pagertab) / sizeof(pagertab[0]);
 
 /*
  * Kernel address space for mapping pages.
  * Used by pagers where KVAs are needed for IO.
  *
  * XXX needs to be large enough to support the number of pending async
  * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size
  * (MAXPHYS == 64k) if you want to get the most efficiency.
  */
 #define PAGER_MAP_SIZE	(8 * 1024 * 1024)
 
 int pager_map_size = PAGER_MAP_SIZE;
 vm_map_t pager_map;
 static int bswneeded;
 static vm_offset_t swapbkva;		/* swap buffers kva */
 struct mtx pbuf_mtx;
 
 void
 vm_pager_init()
 {
 	struct pagerops **pgops;
 
 	/*
 	 * Initialize known pagers
 	 */
 	for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
 		if (pgops && ((*pgops)->pgo_init != NULL))
 			(*(*pgops)->pgo_init) ();
 }
 
 void
 vm_pager_bufferinit()
 {
 	struct buf *bp;
 	int i;
 
 	mtx_init(&pbuf_mtx, "pbuf mutex", MTX_DEF);
 	bp = swbuf;
 	/*
 	 * Now set up swap and physical I/O buffer headers.
 	 */
 	for (i = 0; i < nswbuf; i++, bp++) {
 		TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
 		BUF_LOCKINIT(bp);
 		LIST_INIT(&bp->b_dep);
 		bp->b_rcred = bp->b_wcred = NOCRED;
 		bp->b_xflags = 0;
 	}
 
 	cluster_pbuf_freecnt = nswbuf / 2;
 
 	swapbkva = kmem_alloc_pageable(pager_map, nswbuf * MAXPHYS);
 	if (!swapbkva)
 		panic("Not enough pager_map VM space for physical buffers");
 }
 
 /*
  * Allocate an instance of a pager of the given type.
  * Size, protection and offset parameters are passed in for pagers that
  * need to perform page-level validation (e.g. the device pager).
  */
 vm_object_t
 vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot,
 		  vm_ooffset_t off)
 {
 	struct pagerops *ops;
 
 	ops = pagertab[type];
 	if (ops)
 		return ((*ops->pgo_alloc) (handle, size, prot, off));
 	return (NULL);
 }
 
 void
 vm_pager_deallocate(object)
 	vm_object_t object;
 {
 	(*pagertab[object->type]->pgo_dealloc) (object);
 }
 
 /*
  *      vm_pager_strategy:
  *
  *      called with no specific spl
  *      Execute strategy routine directly to pager.
  */
 
 void
 vm_pager_strategy(vm_object_t object, struct bio *bp)
 {
 	if (pagertab[object->type]->pgo_strategy) {
 	    (*pagertab[object->type]->pgo_strategy)(object, bp);
 	} else {
 		bp->bio_flags |= BIO_ERROR;
 		bp->bio_error = ENXIO;
 		biodone(bp);
 	}
 }
 
 /*
  * vm_pager_get_pages() - inline, see vm/vm_pager.h
  * vm_pager_put_pages() - inline, see vm/vm_pager.h
  * vm_pager_has_page() - inline, see vm/vm_pager.h
  * vm_pager_page_inserted() - inline, see vm/vm_pager.h
  * vm_pager_page_removed() - inline, see vm/vm_pager.h
  */
 
 #if 0
 /*
  *	vm_pager_sync:
  *
  *	Called by pageout daemon before going back to sleep.
  *	Gives pagers a chance to clean up any completed async pageing 
  *	operations.
  */
 void
 vm_pager_sync()
 {
 	struct pagerops **pgops;
 
 	for (pgops = pagertab; pgops < &pagertab[npagers]; pgops++)
 		if (pgops && ((*pgops)->pgo_sync != NULL))
 			(*(*pgops)->pgo_sync) ();
 }
 
 #endif
 
 vm_offset_t
 vm_pager_map_page(m)
 	vm_page_t m;
 {
 	vm_offset_t kva;
 
 	kva = kmem_alloc_wait(pager_map, PAGE_SIZE);
 	pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
 	return (kva);
 }
 
 void
 vm_pager_unmap_page(kva)
 	vm_offset_t kva;
 {
 	pmap_kremove(kva);
 	kmem_free_wakeup(pager_map, kva, PAGE_SIZE);
 }
 
 vm_object_t
 vm_pager_object_lookup(pg_list, handle)
 	register struct pagerlst *pg_list;
 	void *handle;
 {
 	register vm_object_t object;
 
 	TAILQ_FOREACH(object, pg_list, pager_object_list)
 		if (object->handle == handle)
 			return (object);
 	return (NULL);
 }
 
 /*
  * initialize a physical buffer
  */
 
 static void
 initpbuf(struct buf *bp)
 {
 	bp->b_rcred = NOCRED;
 	bp->b_wcred = NOCRED;
 	bp->b_qindex = QUEUE_NONE;
 	bp->b_data = (caddr_t) (MAXPHYS * (bp - swbuf)) + swapbkva;
 	bp->b_kvabase = bp->b_data;
 	bp->b_kvasize = MAXPHYS;
 	bp->b_xflags = 0;
 	bp->b_flags = 0;
 	bp->b_ioflags = 0;
 	bp->b_iodone = NULL;
 	bp->b_error = 0;
+	bp->b_magic = B_MAGIC_BIO;
+	bp->b_op = &buf_ops_bio;
 	BUF_LOCK(bp, LK_EXCLUSIVE);
 }
 
 /*
  * allocate a physical buffer
  *
  *	There are a limited number (nswbuf) of physical buffers.  We need
  *	to make sure that no single subsystem is able to hog all of them,
  *	so each subsystem implements a counter which is typically initialized
  *	to 1/2 nswbuf.  getpbuf() decrements this counter in allocation and
  *	increments it on release, and blocks if the counter hits zero.  A
  *	subsystem may initialize the counter to -1 to disable the feature,
  *	but it must still be sure to match up all uses of getpbuf() with 
  *	relpbuf() using the same variable.
  *
  *	NOTE: pfreecnt can be NULL, but this 'feature' will be removed
  *	relatively soon when the rest of the subsystems get smart about it. XXX
  */
 struct buf *
 getpbuf(pfreecnt)
 	int *pfreecnt;
 {
 	int s;
 	struct buf *bp;
 
 	s = splvm();
 	mtx_lock(&pbuf_mtx);
 
 	for (;;) {
 		if (pfreecnt) {
 			while (*pfreecnt == 0) {
 				msleep(pfreecnt, &pbuf_mtx, PVM, "wswbuf0", 0);
 			}
 		}
 
 		/* get a bp from the swap buffer header pool */
 		if ((bp = TAILQ_FIRST(&bswlist)) != NULL)
 			break;
 
 		bswneeded = 1;
 		msleep(&bswneeded, &pbuf_mtx, PVM, "wswbuf1", 0);
 		/* loop in case someone else grabbed one */
 	}
 	TAILQ_REMOVE(&bswlist, bp, b_freelist);
 	if (pfreecnt)
 		--*pfreecnt;
 	mtx_unlock(&pbuf_mtx);
 	splx(s);
 
 	initpbuf(bp);
 	return bp;
 }
 
 /*
  * allocate a physical buffer, if one is available.
  *
  *	Note that there is no NULL hack here - all subsystems using this
  *	call understand how to use pfreecnt.
  */
 struct buf *
 trypbuf(pfreecnt)
 	int *pfreecnt;
 {
 	int s;
 	struct buf *bp;
 
 	s = splvm();
 	mtx_lock(&pbuf_mtx);
 	if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) {
 		mtx_unlock(&pbuf_mtx);
 		splx(s);
 		return NULL;
 	}
 	TAILQ_REMOVE(&bswlist, bp, b_freelist);
 
 	--*pfreecnt;
 
 	mtx_unlock(&pbuf_mtx);
 	splx(s);
 
 	initpbuf(bp);
 
 	return bp;
 }
 
 /*
  * release a physical buffer
  *
  *	NOTE: pfreecnt can be NULL, but this 'feature' will be removed
  *	relatively soon when the rest of the subsystems get smart about it. XXX
  */
 void
 relpbuf(bp, pfreecnt)
 	struct buf *bp;
 	int *pfreecnt;
 {
 	int s;
 
 	s = splvm();
 	mtx_lock(&pbuf_mtx);
 
 	if (bp->b_rcred != NOCRED) {
 		crfree(bp->b_rcred);
 		bp->b_rcred = NOCRED;
 	}
 	if (bp->b_wcred != NOCRED) {
 		crfree(bp->b_wcred);
 		bp->b_wcred = NOCRED;
 	}
 
 	if (bp->b_vp)
 		pbrelvp(bp);
 
 	BUF_UNLOCK(bp);
 
 	TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist);
 
 	if (bswneeded) {
 		bswneeded = 0;
 		wakeup(&bswneeded);
 	}
 	if (pfreecnt) {
 		if (++*pfreecnt == 1)
 			wakeup(pfreecnt);
 	}
 	mtx_unlock(&pbuf_mtx);
 	splx(s);
 }