Index: head/sbin/fsck/dir.c
===================================================================
--- head/sbin/fsck/dir.c	(revision 34265)
+++ head/sbin/fsck/dir.c	(revision 34266)
@@ -1,734 +1,737 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)dir.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 char	*lfname = "lost+found";
 int	lfmode = 01777;
 struct	dirtemplate emptydir = { 0, DIRBLKSIZ };
 struct	dirtemplate dirhead = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 struct	odirtemplate odirhead = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static int chgino __P((struct inodesc *));
 static int dircheck __P((struct inodesc *, struct direct *));
 static int expanddir __P((struct dinode *dp, char *name));
 static void freedir __P((ino_t ino, ino_t parent));
 static struct direct *fsck_readdir __P((struct inodesc *));
 static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size));
 static int lftempname __P((char *bufp, ino_t ino));
 static int mkentry __P((struct inodesc *));
 
 /*
  * Propagate connected state through the tree.
  */
 void
 propagate()
 {
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	long change;
 
 	inpend = &inpsort[inplast];
 	do {
 		change = 0;
 		for (inpp = inpsort; inpp < inpend; inpp++) {
 			inp = *inpp;
 			if (inp->i_parent == 0)
 				continue;
 			if (statemap[inp->i_parent] == DFOUND &&
 			    statemap[inp->i_number] == DSTATE) {
 				statemap[inp->i_number] = DFOUND;
 				change++;
 			}
 		}
 	} while (change > 0);
 }
 
 /*
  * Scan each entry in a directory block.
  */
 int
 dirscan(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp;
 	register struct bufarea *bp;
 	int dsize, n;
 	long blksiz;
 	char dbuf[DIRBLKSIZ];
 
 	if (idesc->id_type != DATA)
 		errx(EEXIT, "wrong type to dirscan %d", idesc->id_type);
 	if (idesc->id_entryno == 0 &&
 	    (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0)
 		idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ);
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
 		idesc->id_filesize -= blksiz;
 		return (SKIP);
 	}
 	idesc->id_loc = 0;
 	for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) {
 		dsize = dp->d_reclen;
 		memmove(dbuf, dp, (size_t)dsize);
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (!newinofmt) {
 				struct direct *tdp = (struct direct *)dbuf;
 				u_char tmp;
 
 				tmp = tdp->d_namlen;
 				tdp->d_namlen = tdp->d_type;
 				tdp->d_type = tmp;
 			}
 #		endif
 		idesc->id_dirp = (struct direct *)dbuf;
 		if ((n = (*idesc->id_func)(idesc)) & ALTERED) {
 #			if (BYTE_ORDER == LITTLE_ENDIAN)
 				if (!newinofmt && !doinglevel2) {
 					struct direct *tdp;
 					u_char tmp;
 
 					tdp = (struct direct *)dbuf;
 					tmp = tdp->d_namlen;
 					tdp->d_namlen = tdp->d_type;
 					tdp->d_type = tmp;
 				}
 #			endif
 			bp = getdirblk(idesc->id_blkno, blksiz);
 			memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf,
 			    (size_t)dsize);
 			dirty(bp);
 			sbdirty();
 		}
 		if (n & STOP)
 			return (n);
 	}
 	return (idesc->id_filesize > 0 ? KEEPON : STOP);
 }
 
 /*
  * get next entry in a directory.
  */
 static struct direct *
 fsck_readdir(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp, *ndp;
 	register struct bufarea *bp;
 	long size, blksiz, fix, dploc;
 
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	bp = getdirblk(idesc->id_blkno, blksiz);
 	if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 &&
 	    idesc->id_loc < blksiz) {
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		if (dircheck(idesc, dp))
 			goto dpok;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		dp->d_reclen = DIRBLKSIZ;
 		dp->d_ino = 0;
 		dp->d_type = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		if (fix)
 			dirty(bp);
 		idesc->id_loc += DIRBLKSIZ;
 		idesc->id_filesize -= DIRBLKSIZ;
 		return (dp);
 	}
 dpok:
 	if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz)
 		return NULL;
 	dploc = idesc->id_loc;
 	dp = (struct direct *)(bp->b_un.b_buf + dploc);
 	idesc->id_loc += dp->d_reclen;
 	idesc->id_filesize -= dp->d_reclen;
 	if ((idesc->id_loc % DIRBLKSIZ) == 0)
 		return (dp);
 	ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 	if (idesc->id_loc < blksiz && idesc->id_filesize > 0 &&
 	    dircheck(idesc, ndp) == 0) {
 		size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 		idesc->id_loc += size;
 		idesc->id_filesize -= size;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + dploc);
 		dp->d_reclen += size;
 		if (fix)
 			dirty(bp);
 	}
 	return (dp);
 }
 
 /*
  * Verify that a directory entry is valid.
  * This is a superset of the checks made in the kernel.
  */
 static int
 dircheck(idesc, dp)
 	struct inodesc *idesc;
 	register struct direct *dp;
 {
 	register int size;
 	register char *cp;
 	u_char namlen, type;
 	int spaceleft;
 
 	spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 	if (dp->d_ino >= maxino ||
 	    dp->d_reclen == 0 ||
 	    dp->d_reclen > spaceleft ||
 	    (dp->d_reclen & 0x3) != 0)
 		return (0);
 	if (dp->d_ino == 0)
 		return (1);
 	size = DIRSIZ(!newinofmt, dp);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (!newinofmt) {
 			type = dp->d_namlen;
 			namlen = dp->d_type;
 		} else {
 			namlen = dp->d_namlen;
 			type = dp->d_type;
 		}
 #	else
 		namlen = dp->d_namlen;
 		type = dp->d_type;
 #	endif
 	if (dp->d_reclen < size ||
 	    idesc->id_filesize < size ||
 	    namlen > MAXNAMLEN ||
 	    type > 15)
 		return (0);
 	for (cp = dp->d_name, size = 0; size < namlen; size++)
 		if (*cp == '\0' || (*cp++ == '/'))
 			return (0);
 	if (*cp != '\0')
 		return (0);
 	return (1);
 }
 
 void
 direrror(ino, errmesg)
 	ino_t ino;
 	char *errmesg;
 {
 
 	fileerror(ino, ino, errmesg);
 }
 
 void
 fileerror(cwd, ino, errmesg)
 	ino_t cwd, ino;
 	char *errmesg;
 {
 	register struct dinode *dp;
 	char pathbuf[MAXPATHLEN + 1];
 
 	pwarn("%s ", errmesg);
 	pinode(ino);
 	printf("\n");
 	getpathname(pathbuf, cwd, ino);
 	if (ino < ROOTINO || ino > maxino) {
 		pfatal("NAME=%s\n", pathbuf);
 		return;
 	}
 	dp = ginode(ino);
 	if (ftypeok(dp))
 		pfatal("%s=%s\n",
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf);
 	else
 		pfatal("NAME=%s\n", pathbuf);
 }
 
 void
 adjust(idesc, lcnt)
 	register struct inodesc *idesc;
 	int lcnt;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (dp->di_nlink == lcnt) {
 		if (linkup(idesc->id_number, (ino_t)0) == 0)
 			clri(idesc, "UNREF", 0);
 	} else {
 		pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname :
 			((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"));
 		pinode(idesc->id_number);
 		printf(" COUNT %d SHOULD BE %d",
 			dp->di_nlink, dp->di_nlink - lcnt);
-		if (preen) {
+		if (preen || usedsoftdep) {
 			if (lcnt < 0) {
 				printf("\n");
 				pfatal("LINK COUNT INCREASING");
 			}
-			printf(" (ADJUSTED)\n");
+			if (preen)
+				printf(" (ADJUSTED)\n");
 		}
 		if (preen || reply("ADJUST") == 1) {
 			dp->di_nlink -= lcnt;
 			inodirty();
 		}
 	}
 }
 
 static int
 mkentry(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	struct direct newent;
 	int newlen, oldlen;
 
 	newent.d_namlen = strlen(idesc->id_name);
 	newlen = DIRSIZ(0, &newent);
 	if (dirp->d_ino != 0)
 		oldlen = DIRSIZ(0, dirp);
 	else
 		oldlen = 0;
 	if (dirp->d_reclen - oldlen < newlen)
 		return (KEEPON);
 	newent.d_reclen = dirp->d_reclen - oldlen;
 	dirp->d_reclen = oldlen;
 	dirp = (struct direct *)(((char *)dirp) + oldlen);
 	dirp->d_ino = idesc->id_parent;	/* ino to be entered is in id_parent */
 	dirp->d_reclen = newent.d_reclen;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	dirp->d_namlen = newent.d_namlen;
 	memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		/*
 		 * If the entry was split, dirscan() will only reverse the byte
 		 * order of the original entry, and not the new one, before
 		 * writing it back out.  So, we reverse the byte order here if
 		 * necessary.
 		 */
 		if (oldlen != 0 && !newinofmt && !doinglevel2) {
 			u_char tmp;
 
 			tmp = dirp->d_namlen;
 			dirp->d_namlen = dirp->d_type;
 			dirp->d_type = tmp;
 		}
 #	endif
 	return (ALTERED|STOP);
 }
 
 static int
 chgino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1))
 		return (KEEPON);
 	dirp->d_ino = idesc->id_parent;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	return (ALTERED|STOP);
 }
 
 int
 linkup(orphan, parentdir)
 	ino_t orphan;
 	ino_t parentdir;
 {
 	register struct dinode *dp;
 	int lostdir;
 	ino_t oldlfdir;
 	struct inodesc idesc;
 	char tempname[BUFSIZ];
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	dp = ginode(orphan);
 	lostdir = (dp->di_mode & IFMT) == IFDIR;
 	pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
 	pinode(orphan);
-	if (preen && dp->di_size == 0)
+	if ((preen || usedsoftdep) && dp->di_size == 0)
 		return (0);
 	if (preen)
 		printf(" (RECONNECTED)\n");
 	else
 		if (reply("RECONNECT") == 0)
 			return (0);
+	if (parentdir != 0)
+		lncntp[parentdir]++;
 	if (lfdir == 0) {
 		dp = ginode(ROOTINO);
 		idesc.id_name = lfname;
 		idesc.id_type = DATA;
 		idesc.id_func = findino;
 		idesc.id_number = ROOTINO;
 		if ((ckinode(dp, &idesc) & FOUND) != 0) {
 			lfdir = idesc.id_parent;
 		} else {
 			pwarn("NO lost+found DIRECTORY");
 			if (preen || reply("CREATE")) {
 				lfdir = allocdir(ROOTINO, (ino_t)0, lfmode);
 				if (lfdir != 0) {
 					if (makeentry(ROOTINO, lfdir, lfname) != 0) {
 						if (preen)
 							printf(" (CREATED)\n");
 					} else {
 						freedir(lfdir, ROOTINO);
 						lfdir = 0;
 						if (preen)
 							printf("\n");
 					}
 				}
 			}
 		}
 		if (lfdir == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY");
 			printf("\n\n");
 			return (0);
 		}
 	}
 	dp = ginode(lfdir);
 	if ((dp->di_mode & IFMT) != IFDIR) {
 		pfatal("lost+found IS NOT A DIRECTORY");
 		if (reply("REALLOCATE") == 0)
 			return (0);
 		oldlfdir = lfdir;
 		if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		inodirty();
 		idesc.id_type = ADDR;
 		idesc.id_func = pass4check;
 		idesc.id_number = oldlfdir;
 		adjust(&idesc, lncntp[oldlfdir] + 1);
 		lncntp[oldlfdir] = 0;
 		dp = ginode(lfdir);
 	}
 	if (statemap[lfdir] != DFOUND) {
 		pfatal("SORRY. NO lost+found DIRECTORY\n\n");
 		return (0);
 	}
 	(void)lftempname(tempname, orphan);
 	if (makeentry(lfdir, orphan, tempname) == 0) {
 		pfatal("SORRY. NO SPACE IN lost+found DIRECTORY");
 		printf("\n\n");
 		return (0);
 	}
 	lncntp[orphan]--;
 	if (lostdir) {
 		if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
 		    parentdir != (ino_t)-1)
 			(void)makeentry(orphan, lfdir, "..");
 		dp = ginode(lfdir);
 		dp->di_nlink++;
 		inodirty();
 		lncntp[lfdir]++;
 		pwarn("DIR I=%lu CONNECTED. ", orphan);
 		if (parentdir != (ino_t)-1) {
 			printf("PARENT WAS I=%lu\n", parentdir);
 			/*
 			 * The parent directory, because of the ordering
 			 * guarantees, has had the link count incremented
 			 * for the child, but no entry was made.  This
 			 * fixes the parent link count so that fsck does
 			 * not need to be rerun.
 			 */
 			lncntp[parentdir]++;
 
 		}
 		if (preen == 0)
 			printf("\n");
 	}
 	return (1);
 }
 
 /*
  * fix an entry in a directory.
  */
 int
 changeino(dir, name, newnum)
 	ino_t dir;
 	char *name;
 	ino_t newnum;
 {
 	struct inodesc idesc;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = chgino;
 	idesc.id_number = dir;
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	idesc.id_parent = newnum;	/* new value for name */
 	return (ckinode(ginode(dir), &idesc));
 }
 
 /*
  * make an entry in a directory
  */
 int
 makeentry(parent, ino, name)
 	ino_t parent, ino;
 	char *name;
 {
 	struct dinode *dp;
 	struct inodesc idesc;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (parent < ROOTINO || parent >= maxino ||
 	    ino < ROOTINO || ino >= maxino)
 		return (0);
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = mkentry;
 	idesc.id_number = parent;
 	idesc.id_parent = ino;	/* this is the inode to enter */
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	dp = ginode(parent);
 	if (dp->di_size % DIRBLKSIZ) {
 		dp->di_size = roundup(dp->di_size, DIRBLKSIZ);
 		inodirty();
 	}
 	if ((ckinode(dp, &idesc) & ALTERED) != 0)
 		return (1);
 	getpathname(pathbuf, parent, parent);
 	dp = ginode(parent);
 	if (expanddir(dp, pathbuf) == 0)
 		return (0);
 	return (ckinode(dp, &idesc) & ALTERED);
 }
 
 /*
  * Attempt to expand the size of a directory
  */
 static int
 expanddir(dp, name)
 	register struct dinode *dp;
 	char *name;
 {
 	ufs_daddr_t lastbn, newblk;
 	register struct bufarea *bp;
 	char *cp, firstblk[DIRBLKSIZ];
 
 	lastbn = lblkno(&sblock, dp->di_size);
 	if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0)
 		return (0);
 	if ((newblk = allocblk(sblock.fs_frag)) == 0)
 		return (0);
 	dp->di_db[lastbn + 1] = dp->di_db[lastbn];
 	dp->di_db[lastbn] = newblk;
 	dp->di_size += sblock.fs_bsize;
 	dp->di_blocks += btodb(sblock.fs_bsize);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ);
 	bp = getdirblk(newblk, sblock.fs_bsize);
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ);
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_bsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir);
 	pwarn("NO SPACE LEFT IN %s", name);
 	if (preen)
 		printf(" (EXPANDED)\n");
 	else if (reply("EXPAND") == 0)
 		goto bad;
 	dirty(bp);
 	inodirty();
 	return (1);
 bad:
 	dp->di_db[lastbn] = dp->di_db[lastbn + 1];
 	dp->di_db[lastbn + 1] = 0;
 	dp->di_size -= sblock.fs_bsize;
 	dp->di_blocks -= btodb(sblock.fs_bsize);
 	freeblk(newblk, sblock.fs_frag);
 	return (0);
 }
 
 /*
  * allocate a new directory
  */
 ino_t
 allocdir(parent, request, mode)
 	ino_t parent, request;
 	int mode;
 {
 	ino_t ino;
 	char *cp;
 	struct dinode *dp;
 	register struct bufarea *bp;
 	struct dirtemplate *dirp;
 
 	ino = allocino(request, IFDIR|mode);
 	if (newinofmt)
 		dirp = &dirhead;
 	else
 		dirp = (struct dirtemplate *)&odirhead;
 	dirp->dot_ino = ino;
 	dirp->dotdot_ino = parent;
 	dp = ginode(ino);
 	bp = getdirblk(dp->di_db[0], sblock.fs_fsize);
 	if (bp->b_errs) {
 		freeino(ino);
 		return (0);
 	}
 	memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate));
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_fsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	dp->di_nlink = 2;
 	inodirty();
 	if (ino == ROOTINO) {
 		lncntp[ino] = dp->di_nlink;
 		cacheino(dp, ino);
 		return(ino);
 	}
 	if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) {
 		freeino(ino);
 		return (0);
 	}
 	cacheino(dp, ino);
 	statemap[ino] = statemap[parent];
 	if (statemap[ino] == DSTATE) {
 		lncntp[ino] = dp->di_nlink;
 		lncntp[parent]++;
 	}
 	dp = ginode(parent);
 	dp->di_nlink++;
 	inodirty();
 	return (ino);
 }
 
 /*
  * free a directory inode
  */
 static void
 freedir(ino, parent)
 	ino_t ino, parent;
 {
 	struct dinode *dp;
 
 	if (ino != parent) {
 		dp = ginode(parent);
 		dp->di_nlink--;
 		inodirty();
 	}
 	freeino(ino);
 }
 
 /*
  * generate a temporary name for the lost+found directory.
  */
 static int
 lftempname(bufp, ino)
 	char *bufp;
 	ino_t ino;
 {
 	register ino_t in;
 	register char *cp;
 	int namlen;
 
 	cp = bufp + 2;
 	for (in = maxino; in > 0; in /= 10)
 		cp++;
 	*--cp = 0;
 	namlen = cp - bufp;
 	in = ino;
 	while (cp > bufp) {
 		*--cp = (in % 10) + '0';
 		in /= 10;
 	}
 	*cp = '#';
 	return (namlen);
 }
 
 /*
  * Get a directory block.
  * Insure that it is held until another is requested.
  */
 static struct bufarea *
 getdirblk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 
 	if (pdirbp != 0)
 		pdirbp->b_flags &= ~B_INUSE;
 	pdirbp = getdatablk(blkno, size);
 	return (pdirbp);
 }
Index: head/sbin/fsck/fsck.h
===================================================================
--- head/sbin/fsck/fsck.h	(revision 34265)
+++ head/sbin/fsck/fsck.h	(revision 34266)
@@ -1,281 +1,283 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fsck.h	8.4 (Berkeley) 5/9/95
  */
 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 
 #define	MAXDUP		10	/* limit on dup blks (per inode) */
 #define	MAXBAD		10	/* limit on bad blks (per inode) */
 #define	MAXBUFSPACE	40*1024	/* maximum space to allocate to buffers */
 #define	INOBUFSIZE	56*1024	/* size of buffer to read inodes in pass1 */
 
 #ifndef BUFSIZ
 #define BUFSIZ 1024
 #endif
 
 #define	USTATE	01		/* inode not allocated */
 #define	FSTATE	02		/* inode is file */
 #define	DSTATE	03		/* inode is directory */
 #define	DFOUND	04		/* directory found during descent */
 #define	DCLEAR	05		/* directory is to be cleared */
 #define	FCLEAR	06		/* file is to be cleared */
 
 /*
  * buffer cache structure.
  */
 struct bufarea {
 	struct bufarea *b_next;		/* free list queue */
 	struct bufarea *b_prev;		/* free list queue */
 	ufs_daddr_t b_bno;
 	int b_size;
 	int b_errs;
 	int b_flags;
 	union {
 		char *b_buf;			/* buffer space */
 		ufs_daddr_t *b_indir;		/* indirect block */
 		struct fs *b_fs;		/* super block */
 		struct cg *b_cg;		/* cylinder group */
 		struct dinode *b_dinode;	/* inode block */
 	} b_un;
 	char b_dirty;
 };
 
 #define	B_INUSE 1
 
 #define	MINBUFS		5	/* minimum number of buffers required */
 struct bufarea bufhead;		/* head of list of other blks in filesys */
 struct bufarea sblk;		/* file system superblock */
 struct bufarea cgblk;		/* cylinder group blocks */
 struct bufarea *pdirbp;		/* current directory contents */
 struct bufarea *pbp;		/* current inode block */
 
 #define	dirty(bp)	(bp)->b_dirty = 1
 #define	initbarea(bp) \
 	(bp)->b_dirty = 0; \
 	(bp)->b_bno = (ufs_daddr_t)-1; \
 	(bp)->b_flags = 0;
 
 #define	sbdirty()	sblk.b_dirty = 1
 #define	cgdirty()	cgblk.b_dirty = 1
 #define	sblock		(*sblk.b_un.b_fs)
 #define	cgrp		(*cgblk.b_un.b_cg)
 
 enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
 
 struct inodesc {
 	enum fixstate id_fix;	/* policy on fixing errors */
 	int (*id_func)();	/* function to be applied to blocks of inode */
 	ino_t id_number;	/* inode number described */
 	ino_t id_parent;	/* for DATA nodes, their parent */
 	ufs_daddr_t id_blkno;	/* current block number being examined */
 	int id_numfrags;	/* number of frags contained in block */
 	quad_t id_filesize;	/* for DATA nodes, the size of the directory */
 	int id_loc;		/* for DATA nodes, current location in dir */
 	int id_entryno;		/* for DATA nodes, current entry number */
 	struct direct *id_dirp;	/* for DATA nodes, ptr to current entry */
 	char *id_name;		/* for DATA nodes, name to find or enter */
 	char id_type;		/* type of descriptor, DATA or ADDR */
 };
 /* file types */
 #define	DATA	1
 #define	ADDR	2
 
 /*
  * Linked list of duplicate blocks.
  *
  * The list is composed of two parts. The first part of the
  * list (from duplist through the node pointed to by muldup)
  * contains a single copy of each duplicate block that has been
  * found. The second part of the list (from muldup to the end)
  * contains duplicate blocks that have been found more than once.
  * To check if a block has been found as a duplicate it is only
  * necessary to search from duplist through muldup. To find the
  * total number of times that a block has been found as a duplicate
  * the entire list must be searched for occurences of the block
  * in question. The following diagram shows a sample list where
  * w (found twice), x (found once), y (found three times), and z
  * (found once) are duplicate block numbers:
  *
  *    w -> y -> x -> z -> y -> w -> y
  *    ^		     ^
  *    |		     |
  * duplist	  muldup
  */
 struct dups {
 	struct dups *next;
 	ufs_daddr_t dup;
 };
 struct dups *duplist;		/* head of dup list */
 struct dups *muldup;		/* end of unique duplicate dup block numbers */
 
 /*
  * Linked list of inodes with zero link counts.
  */
 struct zlncnt {
 	struct zlncnt *next;
 	ino_t zlncnt;
 };
 struct zlncnt *zlnhead;		/* head of zero link count list */
 
 /*
  * Inode cache data structures.
  */
 struct inoinfo {
 	struct	inoinfo *i_nexthash;	/* next entry in hash chain */
 	ino_t	i_number;		/* inode number of this entry */
 	ino_t	i_parent;		/* inode number of parent */
 	ino_t	i_dotdot;		/* inode number of `..' */
 	size_t	i_isize;		/* size of inode */
 	u_int	i_numblks;		/* size of block array in bytes */
 	ufs_daddr_t i_blks[1];		/* actually longer */
 } **inphead, **inpsort;
 long numdirs, listmax, inplast;
 
 char	*cdevname;		/* name of device being checked */
 long	dev_bsize;		/* computed value of DEV_BSIZE */
 long	secsize;		/* actual disk sector size */
 char	fflag;			/* force fs check (ignore clean flag) */
 char	nflag;			/* assume a no response */
 char	yflag;			/* assume a yes response */
 int	bflag;			/* location of alternate super block */
 int	debug;			/* output debugging info */
 int	cvtlevel;		/* convert to newer file system format */
 int	doinglevel1;		/* converting to new cylinder group format */
 int	doinglevel2;		/* converting to new inode format */
 int	newinofmt;		/* filesystem has new inode format */
+char	usedsoftdep;		/* just fix soft dependency inconsistencies */
+char	resolved;		/* cleared if unresolved changes => not clean */
 char	preen;			/* just fix normal inconsistencies */
 char	hotroot;		/* checking root device */
 char	havesb;			/* superblock has been read */
 int	fsmodified;		/* 1 => write done to file system */
 int	fsreadfd;		/* file descriptor for reading file system */
 int	fswritefd;		/* file descriptor for writing file system */
 int	returntosingle;		/* return to single user mode */
 int	rerun;			/* rerun fsck. Only used in non-preen mode */
 
 ufs_daddr_t maxfsblock;		/* number of blocks in the file system */
 char	*blockmap;		/* ptr to primary blk allocation map */
 ino_t	maxino;			/* number of inodes in file system */
 ino_t	lastino;		/* last inode in use */
 char	*statemap;		/* ptr to inode state table */
 u_char	*typemap;		/* ptr to inode type table */
 short	*lncntp;		/* ptr to link count table */
 
 ino_t	lfdir;			/* lost & found directory inode number */
 char	*lfname;		/* lost & found directory name */
 int	lfmode;			/* lost & found directory creation mode */
 
 ufs_daddr_t n_blks;		/* number of blocks in use */
 ufs_daddr_t n_files;		/* number of files in use */
 
 #define	clearinode(dp)	(*(dp) = zino)
 struct	dinode zino;
 
 #define	setbmap(blkno)	setbit(blockmap, blkno)
 #define	testbmap(blkno)	isset(blockmap, blkno)
 #define	clrbmap(blkno)	clrbit(blockmap, blkno)
 
 #define	STOP	0x01
 #define	SKIP	0x02
 #define	KEEPON	0x04
 #define	ALTERED	0x08
 #define	FOUND	0x10
 
 #define	EEXIT	8		/* Standard error exit. */
 
 struct fstab;
 
 void		adjust __P((struct inodesc *, int lcnt));
 ufs_daddr_t	allocblk __P((long frags));
 ino_t		allocdir __P((ino_t parent, ino_t request, int mode));
 ino_t		allocino __P((ino_t request, int type));
 void		blkerror __P((ino_t ino, char *type, ufs_daddr_t blk));
 char	       *blockcheck __P((char *name));
 int		bread __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		bufinit __P((void));
 void		bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		cacheino __P((struct dinode *dp, ino_t inumber));
 void		catch __P((int));
 void		catchquit __P((int));
 int		changeino __P((ino_t dir, char *name, ino_t newnum));
 int		checkfstab __P((int preen, int maxrun,
 			int (*docheck)(struct fstab *),
 			int (*chkit)(char *, char *, long, int)));
 int		chkrange __P((ufs_daddr_t blk, int cnt));
 void		ckfini __P((int markclean));
 int		ckinode __P((struct dinode *dp, struct inodesc *));
 void		clri __P((struct inodesc *, char *type, int flag));
 void		direrror __P((ino_t ino, char *errmesg));
 int		dirscan __P((struct inodesc *));
 int		dofix __P((struct inodesc *, char *msg));
 void		ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_fragacct __P((struct fs *, int, int32_t [], int));
 int		ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		fileerror __P((ino_t cwd, ino_t ino, char *errmesg));
 int		findino __P((struct inodesc *));
 int		findname __P((struct inodesc *));
 void		flush __P((int fd, struct bufarea *bp));
 void		freeblk __P((ufs_daddr_t blkno, long frags));
 void		freeino __P((ino_t ino));
 void		freeinodebuf __P((void));
 int		ftypeok __P((struct dinode *dp));
 void		getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size));
 struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size));
 struct inoinfo *getinoinfo __P((ino_t inumber));
 struct dinode  *getnextinode __P((ino_t inumber));
 void		getpathname __P((char *namebuf, ino_t curdir, ino_t ino));
 struct dinode  *ginode __P((ino_t inumber));
 void		inocleanup __P((void));
 void		inodirty __P((void));
 int		linkup __P((ino_t orphan, ino_t parentdir));
 int		makeentry __P((ino_t parent, ino_t ino, char *name));
 void		panic __P((const char *fmt, ...));
 void		pass1 __P((void));
 void		pass1b __P((void));
 int		pass1check __P((struct inodesc *));
 void		pass2 __P((void));
 void		pass3 __P((void));
 void		pass4 __P((void));
 int		pass4check __P((struct inodesc *));
 void		pass5 __P((void));
 void		pfatal __P((const char *fmt, ...));
 void		pinode __P((ino_t ino));
 void		propagate __P((void));
 void		pwarn __P((const char *fmt, ...));
 int		reply __P((char *question));
 void		resetinodebuf __P((void));
 int		setup __P((char *dev));
 void		voidquit __P((int));
Index: head/sbin/fsck/inode.c
===================================================================
--- head/sbin/fsck/inode.c	(revision 34265)
+++ head/sbin/fsck/inode.c	(revision 34266)
@@ -1,621 +1,632 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <pwd.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ino_t startinum;
 
 static int iblock __P((struct inodesc *, long ilevel, quad_t isize));
 
 int
 ckinode(dp, idesc)
 	struct dinode *dp;
 	register struct inodesc *idesc;
 {
 	ufs_daddr_t *ap;
 	long ret, n, ndb, offset;
 	struct dinode dino;
 	quad_t remsize, sizepb;
 	mode_t mode;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (idesc->id_fix != IGNORE)
 		idesc->id_fix = DONTKNOW;
 	idesc->id_entryno = 0;
 	idesc->id_filesize = dp->di_size;
 	mode = dp->di_mode & IFMT;
 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
 	    (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0)))
 		return (KEEPON);
 	dino = *dp;
 	ndb = howmany(dino.di_size, sblock.fs_bsize);
 	for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) {
 		if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0)
 			idesc->id_numfrags =
 				numfrags(&sblock, fragroundup(&sblock, offset));
 		else
 			idesc->id_numfrags = sblock.fs_frag;
 		if (*ap == 0) {
 			if (idesc->id_type == DATA && ndb >= 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size = (ap - &dino.di_db[0]) *
 					    sblock.fs_bsize;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					
                         	}
 			}
 			continue;
 		}
 		idesc->id_blkno = *ap;
 		if (idesc->id_type == ADDR)
 			ret = (*idesc->id_func)(idesc);
 		else
 			ret = dirscan(idesc);
 		if (ret & STOP)
 			return (ret);
 	}
 	idesc->id_numfrags = sblock.fs_frag;
 	remsize = dino.di_size - sblock.fs_bsize * NDADDR;
 	sizepb = sblock.fs_bsize;
 	for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			ret = iblock(idesc, n, remsize);
 			if (ret & STOP)
 				return (ret);
 		} else {
 			if (idesc->id_type == DATA && remsize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= remsize;
 					remsize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					break;
                         	}
 			}
 		}
 		sizepb *= NINDIR(&sblock);
 		remsize -= sizepb;
 	}
 	return (KEEPON);
 }
 
 static int
 iblock(idesc, ilevel, isize)
 	struct inodesc *idesc;
 	long ilevel;
 	quad_t isize;
 {
 	ufs_daddr_t *ap;
 	ufs_daddr_t *aplim;
 	struct bufarea *bp;
 	int i, n, (*func)(), nif;
 	quad_t sizepb;
 	char buf[BUFSIZ];
 	char pathbuf[MAXPATHLEN + 1];
 	struct dinode *dp;
 
 	if (idesc->id_type == ADDR) {
 		func = idesc->id_func;
 		if (((n = (*func)(idesc)) & KEEPON) == 0)
 			return (n);
 	} else
 		func = dirscan;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags))
 		return (SKIP);
 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize);
 	ilevel--;
 	for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++)
 		sizepb *= NINDIR(&sblock);
 	nif = howmany(isize , sizepb);
 	if (nif > NINDIR(&sblock))
 		nif = NINDIR(&sblock);
 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
 		aplim = &bp->b_un.b_indir[NINDIR(&sblock)];
 		for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) {
 			if (*ap == 0)
 				continue;
 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
 				idesc->id_number);
 			if (dofix(idesc, buf)) {
 				*ap = 0;
 				dirty(bp);
 			}
 		}
 		flush(fswritefd, bp);
 	}
 	aplim = &bp->b_un.b_indir[nif];
 	for (ap = bp->b_un.b_indir; ap < aplim; ap++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			if (ilevel == 0)
 				n = (*func)(idesc);
 			else
 				n = iblock(idesc, ilevel, isize);
 			if (n & STOP) {
 				bp->b_flags &= ~B_INUSE;
 				return (n);
 			}
 		} else {
 			if (idesc->id_type == DATA && isize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= isize;
 					isize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					bp->b_flags &= ~B_INUSE;
 					return(STOP);
                         	}
 			}
 		}
 		isize -= sizepb;
 	}
 	bp->b_flags &= ~B_INUSE;
 	return (KEEPON);
 }
 
 /*
  * Check that a block in a legal block number.
  * Return 0 if in range, 1 if out of range.
  */
 int
 chkrange(blk, cnt)
 	ufs_daddr_t blk;
 	int cnt;
 {
 	register int c;
 
 	if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk)
 		return (1);
 	c = dtog(&sblock, blk);
 	if (blk < cgdmin(&sblock, c)) {
 		if ((blk + cnt) > cgsblock(&sblock, c)) {
 			if (debug) {
 				printf("blk %ld < cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > cgsbase %ld\n",
 				    blk + cnt, cgsblock(&sblock, c));
 			}
 			return (1);
 		}
 	} else {
 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
 			if (debug)  {
 				printf("blk %ld >= cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
 				    blk+cnt, sblock.fs_fpg);
 			}
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * General purpose interface for reading inodes.
  */
 struct dinode *
 ginode(inumber)
 	ino_t inumber;
 {
 	ufs_daddr_t iblk;
 
 	if (inumber < ROOTINO || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to ginode", inumber);
 	if (startinum == 0 ||
 	    inumber < startinum || inumber >= startinum + INOPB(&sblock)) {
 		iblk = ino_to_fsba(&sblock, inumber);
 		if (pbp != 0)
 			pbp->b_flags &= ~B_INUSE;
 		pbp = getdatablk(iblk, sblock.fs_bsize);
 		startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock);
 	}
 	return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]);
 }
 
 /*
  * Special purpose version of ginode used to optimize first pass
  * over all the inodes in numerical order.
  */
 ino_t nextino, lastinum;
 long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
 struct dinode *inodebuf;
 
 struct dinode *
 getnextinode(inumber)
 	ino_t inumber;
 {
 	long size;
 	ufs_daddr_t dblk;
 	static struct dinode *dp;
 
 	if (inumber != nextino++ || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to nextinode", inumber);
 	if (inumber >= lastinum) {
 		readcnt++;
 		dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum));
 		if (readcnt % readpercg == 0) {
 			size = partialsize;
 			lastinum += partialcnt;
 		} else {
 			size = inobufsize;
 			lastinum += fullcnt;
 		}
 		(void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */
 		dp = inodebuf;
 	}
 	return (dp++);
 }
 
 void
 resetinodebuf()
 {
 
 	startinum = 0;
 	nextino = 0;
 	lastinum = 0;
 	readcnt = 0;
 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
 	fullcnt = inobufsize / sizeof(struct dinode);
 	readpercg = sblock.fs_ipg / fullcnt;
 	partialcnt = sblock.fs_ipg % fullcnt;
 	partialsize = partialcnt * sizeof(struct dinode);
 	if (partialcnt != 0) {
 		readpercg++;
 	} else {
 		partialcnt = fullcnt;
 		partialsize = inobufsize;
 	}
 	if (inodebuf == NULL &&
 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
 		errx(EEXIT, "Cannot allocate space for inode buffer");
 	while (nextino < ROOTINO)
 		(void)getnextinode(nextino);
 }
 
 void
 freeinodebuf()
 {
 
 	if (inodebuf != NULL)
 		free((char *)inodebuf);
 	inodebuf = NULL;
 }
 
 /*
  * Routines to maintain information about directory inodes.
  * This is built during the first pass and used during the
  * second and third passes.
  *
  * Enter inodes into the cache.
  */
 void
 cacheino(dp, inumber)
 	register struct dinode *dp;
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 	struct inoinfo **inpp;
 	unsigned int blks;
 
 	blks = howmany(dp->di_size, sblock.fs_bsize);
 	if (blks > NDADDR)
 		blks = NDADDR + NIADDR;
 	inp = (struct inoinfo *)
 		malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t));
 	if (inp == NULL)
 		return;
 	inpp = &inphead[inumber % numdirs];
 	inp->i_nexthash = *inpp;
 	*inpp = inp;
 	if (inumber == ROOTINO)
 		inp->i_parent = ROOTINO;
 	else
 		inp->i_parent = (ino_t)0;
 	inp->i_dotdot = (ino_t)0;
 	inp->i_number = inumber;
 	inp->i_isize = dp->di_size;
 	inp->i_numblks = blks * sizeof(ufs_daddr_t);
 	memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks);
 	if (inplast == listmax) {
 		listmax += 100;
 		inpsort = (struct inoinfo **)realloc((char *)inpsort,
 		    (unsigned)listmax * sizeof(struct inoinfo *));
 		if (inpsort == NULL)
 			errx(EEXIT, "cannot increase directory list");
 	}
 	inpsort[inplast++] = inp;
 }
 
 /*
  * Look up an inode cache structure.
  */
 struct inoinfo *
 getinoinfo(inumber)
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 
 	for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) {
 		if (inp->i_number != inumber)
 			continue;
 		return (inp);
 	}
 	errx(EEXIT, "cannot find inode %d", inumber);
 	return ((struct inoinfo *)0);
 }
 
 /*
  * Clean up all the inode cache structure.
  */
 void
 inocleanup()
 {
 	register struct inoinfo **inpp;
 
 	if (inphead == NULL)
 		return;
 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
 		free((char *)(*inpp));
 	free((char *)inphead);
 	free((char *)inpsort);
 	inphead = inpsort = NULL;
 }
 
 void
 inodirty()
 {
 
 	dirty(pbp);
 }
 
 void
 clri(idesc, type, flag)
 	register struct inodesc *idesc;
 	char *type;
 	int flag;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (flag == 1) {
 		pwarn("%s %s", type,
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE");
 		pinode(idesc->id_number);
 	}
 	if (preen || reply("CLEAR") == 1) {
 		if (preen)
 			printf(" (CLEARED)\n");
 		n_files--;
 		(void)ckinode(dp, idesc);
 		clearinode(dp);
 		statemap[idesc->id_number] = USTATE;
 		inodirty();
 	}
 }
 
 int
 findname(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino != idesc->id_parent)
 		return (KEEPON);
 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
 	return (STOP|FOUND);
 }
 
 int
 findino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino == 0)
 		return (KEEPON);
 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
 	    dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) {
 		idesc->id_parent = dirp->d_ino;
 		return (STOP|FOUND);
 	}
 	return (KEEPON);
 }
 
 void
 pinode(ino)
 	ino_t ino;
 {
 	register struct dinode *dp;
 	register char *p;
 	struct passwd *pw;
 	time_t t;
 
 	printf(" I=%lu ", ino);
 	if (ino < ROOTINO || ino > maxino)
 		return;
 	dp = ginode(ino);
 	printf(" OWNER=");
 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
 		printf("%s ", pw->pw_name);
 	else
 		printf("%u ", (unsigned)dp->di_uid);
 	printf("MODE=%o\n", dp->di_mode);
 	if (preen)
 		printf("%s: ", cdevname);
 	printf("SIZE=%qu ", dp->di_size);
 	t = dp->di_mtime;
 	p = ctime(&t);
 	printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
 }
 
 void
 blkerror(ino, type, blk)
 	ino_t ino;
 	char *type;
 	ufs_daddr_t blk;
 {
 
 	pfatal("%ld %s I=%lu", blk, type, ino);
 	printf("\n");
 	switch (statemap[ino]) {
 
 	case FSTATE:
 		statemap[ino] = FCLEAR;
 		return;
 
 	case DSTATE:
 		statemap[ino] = DCLEAR;
 		return;
 
 	case FCLEAR:
 	case DCLEAR:
 		return;
 
 	default:
 		errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]);
 		/* NOTREACHED */
 	}
 }
 
 /*
  * allocate an unused inode
  */
 ino_t
 allocino(request, type)
 	ino_t request;
 	int type;
 {
 	register ino_t ino;
 	register struct dinode *dp;
+	struct cg *cgp = &cgrp;
+	int cg;
 
 	if (request == 0)
 		request = ROOTINO;
 	else if (statemap[request] != USTATE)
 		return (0);
 	for (ino = request; ino < maxino; ino++)
 		if (statemap[ino] == USTATE)
 			break;
 	if (ino == maxino)
 		return (0);
+	cg = ino_to_cg(&sblock, ino);
+	getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+	if (!cg_chkmagic(cgp))
+		pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
+	cgp->cg_cs.cs_nifree--;
 	switch (type & IFMT) {
 	case IFDIR:
 		statemap[ino] = DSTATE;
+		cgp->cg_cs.cs_ndir++;
 		break;
 	case IFREG:
 	case IFLNK:
 		statemap[ino] = FSTATE;
 		break;
 	default:
 		return (0);
 	}
+	cgdirty();
 	dp = ginode(ino);
 	dp->di_db[0] = allocblk((long)1);
 	if (dp->di_db[0] == 0) {
 		statemap[ino] = USTATE;
 		return (0);
 	}
+	dp->di_flags = 0;
 	dp->di_mode = type;
 	dp->di_atime = time(NULL);
 	dp->di_mtime = dp->di_ctime = dp->di_atime;
 	dp->di_size = sblock.fs_fsize;
 	dp->di_blocks = btodb(sblock.fs_fsize);
 	n_files++;
 	inodirty();
 	if (newinofmt)
 		typemap[ino] = IFTODT(type);
 	return (ino);
 }
 
 /*
  * deallocate an inode
  */
 void
 freeino(ino)
 	ino_t ino;
 {
 	struct inodesc idesc;
 	struct dinode *dp;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass4check;
 	idesc.id_number = ino;
 	dp = ginode(ino);
 	(void)ckinode(dp, &idesc);
 	clearinode(dp);
 	inodirty();
 	statemap[ino] = USTATE;
 	n_files--;
 }
Index: head/sbin/fsck/main.c
===================================================================
--- head/sbin/fsck/main.c	(revision 34265)
+++ head/sbin/fsck/main.c	(revision 34266)
@@ -1,353 +1,359 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1986, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)main.c	8.6 (Berkeley) 5/14/95";
 #endif
 static const char rcsid[] =
-	"$Id$";
+	"$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <fstab.h>
 #include <string.h>
 
 #include "fsck.h"
 
 int	returntosingle;
 
 static int argtoi __P((int flag, char *req, char *str, int base));
 static int docheck __P((struct fstab *fsp));
 static int checkfilesys __P((char *filesys, char *mntpt, long auxdata,
 		int child));
 int main __P((int argc, char *argv[]));
 
 int
 main(argc, argv)
 	int	argc;
 	char	*argv[];
 {
 	int ch;
 	int ret, maxrun = 0;
 
 	sync();
 	while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) {
 		switch (ch) {
 		case 'p':
 			preen++;
 			break;
 
 		case 'b':
 			bflag = argtoi('b', "number", optarg, 10);
 			printf("Alternate super block location: %d\n", bflag);
 			break;
 
 		case 'c':
 			cvtlevel = argtoi('c', "conversion level", optarg, 10);
 			break;
 
 		case 'd':
 			debug++;
 			break;
 
 		case 'f':
 			fflag++;
 			break;
 
 		case 'l':
 			maxrun = argtoi('l', "number", optarg, 10);
 			break;
 
 		case 'm':
 			lfmode = argtoi('m', "mode", optarg, 8);
 			if (lfmode &~ 07777)
 				errx(EEXIT, "bad mode to -m: %o", lfmode);
 			printf("** lost+found creation mode %o\n", lfmode);
 			break;
 
 		case 'n':
 		case 'N':
 			nflag++;
 			yflag = 0;
 			break;
 
 		case 'y':
 		case 'Y':
 			yflag++;
 			nflag = 0;
 			break;
 
 		default:
 			errx(EEXIT, "%c option?", ch);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
 		(void)signal(SIGINT, catch);
 	if (preen)
 		(void)signal(SIGQUIT, catchquit);
 	if (argc) {
 		while (argc-- > 0)
 			(void)checkfilesys(blockcheck(*argv++), 0, 0L, 0);
 		exit(0);
 	}
 	ret = checkfstab(preen, maxrun, docheck, checkfilesys);
 	if (returntosingle)
 		exit(2);
 	exit(ret);
 }
 
 static int
 argtoi(flag, req, str, base)
 	int flag;
 	char *req, *str;
 	int base;
 {
 	char *cp;
 	int ret;
 
 	ret = (int)strtol(str, &cp, base);
 	if (cp == str || *cp)
 		errx(EEXIT, "-%c flag requires a %s", flag, req);
 	return (ret);
 }
 
 /*
  * Determine whether a filesystem should be checked.
  */
 static int
 docheck(fsp)
 	register struct fstab *fsp;
 {
 
 	if (strcmp(fsp->fs_vfstype, "ufs") ||
 	    (strcmp(fsp->fs_type, FSTAB_RW) &&
 	     strcmp(fsp->fs_type, FSTAB_RO)) ||
 	    fsp->fs_passno == 0)
 		return (0);
 	return (1);
 }
 
 /*
  * Check the specified filesystem.
  */
 /* ARGSUSED */
 static int
 checkfilesys(filesys, mntpt, auxdata, child)
 	char *filesys, *mntpt;
 	long auxdata;
 	int child;
 {
 	ufs_daddr_t n_ffree, n_bfree;
 	struct dups *dp;
 	struct zlncnt *zlnp;
 	int cylno, flags;
 
 	if (preen && child)
 		(void)signal(SIGQUIT, voidquit);
 	cdevname = filesys;
 	if (debug && preen)
 		pwarn("starting\n");
 	switch (setup(filesys)) {
 	case 0:
 		if (preen)
 			pfatal("CAN'T CHECK FILE SYSTEM.");
 		return (0);
 	case -1:
 		pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree +
 		    sblock.fs_frag * sblock.fs_cstotal.cs_nbfree);
 		printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 		    sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree,
 		    sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
 		return (0);
 	}
 
 	/*
+	 * Cleared if any questions answered no. Used to decide if
+	 * the superblock should be marked clean.
+	 */
+	resolved = 1;
+	/*
 	 * 1: scan inodes tallying blocks used
 	 */
 	if (preen == 0) {
 		printf("** Last Mounted on %s\n", sblock.fs_fsmnt);
 		if (hotroot)
 			printf("** Root file system\n");
 		printf("** Phase 1 - Check Blocks and Sizes\n");
 	}
 	pass1();
 
 	/*
 	 * 1b: locate first references to duplicates, if any
 	 */
 	if (duplist) {
-		if (preen)
+		if (preen || usedsoftdep)
 			pfatal("INTERNAL ERROR: dups with -p");
 		printf("** Phase 1b - Rescan For More DUPS\n");
 		pass1b();
 	}
 
 	/*
 	 * 2: traverse directories from root to mark all connected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 2 - Check Pathnames\n");
 	pass2();
 
 	/*
 	 * 3: scan inodes looking for disconnected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 3 - Check Connectivity\n");
 	pass3();
 
 	/*
 	 * 4: scan inodes looking for disconnected files; check reference counts
 	 */
 	if (preen == 0)
 		printf("** Phase 4 - Check Reference Counts\n");
 	pass4();
 
 	/*
 	 * 5: check and repair resource counts in cylinder groups
 	 */
 	if (preen == 0)
 		printf("** Phase 5 - Check Cyl groups\n");
 	pass5();
 
 	/*
 	 * print out summary statistics
 	 */
 	n_ffree = sblock.fs_cstotal.cs_nffree;
 	n_bfree = sblock.fs_cstotal.cs_nbfree;
 	pwarn("%ld files, %ld used, %ld free ",
 	    n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree);
 	printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 	    n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize);
 	if (debug &&
 	    (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree))
 		printf("%d files missing\n", n_files);
 	if (debug) {
 		n_blks += sblock.fs_ncg *
 			(cgdmin(&sblock, 0) - cgsblock(&sblock, 0));
 		n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0);
 		n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree))
 			printf("%d blocks missing\n", n_blks);
 		if (duplist != NULL) {
 			printf("The following duplicate blocks remain:");
 			for (dp = duplist; dp; dp = dp->next)
 				printf(" %d,", dp->dup);
 			printf("\n");
 		}
 		if (zlnhead != NULL) {
 			printf("The following zero link count inodes remain:");
 			for (zlnp = zlnhead; zlnp; zlnp = zlnp->next)
 				printf(" %u,", zlnp->zlncnt);
 			printf("\n");
 		}
 	}
 	zlnhead = (struct zlncnt *)0;
 	duplist = (struct dups *)0;
 	muldup = (struct dups *)0;
 	inocleanup();
 	if (fsmodified) {
 		(void)time(&sblock.fs_time);
 		sbdirty();
 	}
 	if (cvtlevel && sblk.b_dirty) {
 		/*
 		 * Write out the duplicate super blocks
 		 */
 		for (cylno = 0; cylno < sblock.fs_ncg; cylno++)
 			bwrite(fswritefd, (char *)&sblock,
 			    fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE);
 	}
-	if (!hotroot) {
-		ckfini(1);
-	} else {
+	if (rerun)
+		resolved = 0;
+	flags = 0;
+	if (hotroot) {
 		struct statfs stfs_buf;
 		/*
 		 * Check to see if root is mounted read-write.
 		 */
 		if (statfs("/", &stfs_buf) == 0)
 			flags = stfs_buf.f_flags;
-		else
-			flags = 0;
-		ckfini(flags & MNT_RDONLY);
+		if ((flags & MNT_RDONLY) == 0)
+			resolved = 0;
 	}
+	ckfini(resolved);
 	free(blockmap);
 	free(statemap);
 	free((char *)lncntp);
 	if (!fsmodified)
 		return (0);
 	if (!preen)
 		printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
 	if (rerun)
 		printf("\n***** PLEASE RERUN FSCK *****\n");
 	if (hotroot) {
 		struct ufs_args args;
 		int ret;
 		/*
 		 * We modified the root.  Do a mount update on
 		 * it, unless it is read-write, so we can continue.
 		 */
 		if (flags & MNT_RDONLY) {
 			args.fspec = 0;
 			args.export.ex_flags = 0;
 			args.export.ex_root = 0;
 			flags |= MNT_UPDATE | MNT_RELOAD;
 			ret = mount("ufs", "/", flags, &args);
 			if (ret == 0)
 				return (0);
 		}
 		if (!preen)
 			printf("\n***** REBOOT NOW *****\n");
 		sync();
 		return (4);
 	}
 	return (0);
 }
Index: head/sbin/fsck/pass1.c
===================================================================
--- head/sbin/fsck/pass1.c	(revision 34265)
+++ head/sbin/fsck/pass1.c	(revision 34266)
@@ -1,322 +1,330 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass1.c	8.6 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ufs_daddr_t badblk;
 static ufs_daddr_t dupblk;
 
 static void checkinode __P((ino_t inumber, struct inodesc *));
 
 void
 pass1()
 {
 	ino_t inumber;
 	int c, i, cgd;
 	struct inodesc idesc;
 
 	/*
 	 * Set file system reserved blocks in used block map.
 	 */
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		cgd = cgdmin(&sblock, c);
 		if (c == 0) {
 			i = cgbase(&sblock, c);
 			cgd += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		} else
 			i = cgsblock(&sblock, c);
 		for (; i < cgd; i++)
 			setbmap(i);
 	}
 	/*
 	 * Find all allocated blocks.
 	 */
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass1check;
 	inumber = 0;
 	n_files = n_blks = 0;
 	resetinodebuf();
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		for (i = 0; i < sblock.fs_ipg; i++, inumber++) {
 			if (inumber < ROOTINO)
 				continue;
 			checkinode(inumber, &idesc);
 		}
 	}
 	freeinodebuf();
 }
 
 static void
 checkinode(inumber, idesc)
 	ino_t inumber;
 	register struct inodesc *idesc;
 {
 	register struct dinode *dp;
 	struct zlncnt *zlnp;
 	int ndb, j;
 	mode_t mode;
 	char *symbuf;
 
 	dp = getnextinode(inumber);
 	mode = dp->di_mode & IFMT;
 	if (mode == 0) {
 		if (memcmp(dp->di_db, zino.di_db,
 			NDADDR * sizeof(ufs_daddr_t)) ||
 		    memcmp(dp->di_ib, zino.di_ib,
 			NIADDR * sizeof(ufs_daddr_t)) ||
 		    dp->di_mode || dp->di_size) {
 			pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber);
 			if (reply("CLEAR") == 1) {
 				dp = ginode(inumber);
 				clearinode(dp);
 				inodirty();
 			}
 		}
 		statemap[inumber] = USTATE;
 		return;
 	}
 	lastino = inumber;
 	if (/* dp->di_size < 0 || */
 	    dp->di_size + sblock.fs_bsize - 1 < dp->di_size ||
 	    (mode == IFDIR && dp->di_size > MAXDIRSIZE)) {
 		if (debug)
 			printf("bad size %qu:", dp->di_size);
 		goto unknown;
 	}
 	if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) {
 		dp = ginode(inumber);
 		dp->di_size = sblock.fs_fsize;
 		dp->di_mode = IFREG|0600;
 		inodirty();
 	}
 	ndb = howmany(dp->di_size, sblock.fs_bsize);
 	if (ndb < 0) {
 		if (debug)
 			printf("bad size %qu ndb %d:",
 				dp->di_size, ndb);
 		goto unknown;
 	}
 	if (mode == IFBLK || mode == IFCHR)
 		ndb++;
 	if (mode == IFLNK) {
 		if (doinglevel2 &&
 		    dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN &&
 		    dp->di_blocks != 0) {
 			symbuf = alloca(secsize);
 			if (bread(fsreadfd, symbuf,
 			    fsbtodb(&sblock, dp->di_db[0]),
 			    (long)secsize) != 0)
 				errx(EEXIT, "cannot read symlink");
 			if (debug) {
 				symbuf[dp->di_size] = 0;
 				printf("convert symlink %ld(%s) of size %ld\n",
 					inumber, symbuf, (long)dp->di_size);
 			}
 			dp = ginode(inumber);
 			memmove(dp->di_shortlink, symbuf, (long)dp->di_size);
 			dp->di_blocks = 0;
 			inodirty();
 		}
 		/*
 		 * Fake ndb value so direct/indirect block checks below
 		 * will detect any garbage after symlink string.
 		 */
 		if (dp->di_size < sblock.fs_maxsymlinklen ||
 		    dp->di_blocks == 0) {
 			ndb = howmany(dp->di_size, sizeof(ufs_daddr_t));
 			if (ndb > NDADDR) {
 				j = ndb - NDADDR;
 				for (ndb = 1; j > 1; j--)
 					ndb *= NINDIR(&sblock);
 				ndb += NDADDR;
 			}
 		}
 	}
 	for (j = ndb; j < NDADDR; j++)
 		if (dp->di_db[j] != 0) {
 			if (debug)
 				printf("bad direct addr: %ld\n", dp->di_db[j]);
 			goto unknown;
 		}
 	for (j = 0, ndb -= NDADDR; ndb > 0; j++)
 		ndb /= NINDIR(&sblock);
 	for (; j < NIADDR; j++)
 		if (dp->di_ib[j] != 0) {
 			if (debug)
 				printf("bad indirect addr: %ld\n",
 					dp->di_ib[j]);
 			goto unknown;
 		}
 	if (ftypeok(dp) == 0)
 		goto unknown;
 	n_files++;
 	lncntp[inumber] = dp->di_nlink;
 	if (dp->di_nlink <= 0) {
 		zlnp = (struct zlncnt *)malloc(sizeof *zlnp);
 		if (zlnp == NULL) {
 			pfatal("LINK COUNT TABLE OVERFLOW");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 		} else {
 			zlnp->zlncnt = inumber;
 			zlnp->next = zlnhead;
 			zlnhead = zlnp;
 		}
 	}
 	if (mode == IFDIR) {
 		if (dp->di_size == 0)
 			statemap[inumber] = DCLEAR;
 		else
 			statemap[inumber] = DSTATE;
 		cacheino(dp, inumber);
 	} else
 		statemap[inumber] = FSTATE;
 	typemap[inumber] = IFTODT(mode);
 	if (doinglevel2 &&
 	    (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) {
 		dp = ginode(inumber);
 		dp->di_uid = dp->di_ouid;
 		dp->di_ouid = -1;
 		dp->di_gid = dp->di_ogid;
 		dp->di_ogid = -1;
 		inodirty();
 	}
 	badblk = dupblk = 0;
 	idesc->id_number = inumber;
 	(void)ckinode(dp, idesc);
 	idesc->id_entryno *= btodb(sblock.fs_fsize);
 	if (dp->di_blocks != idesc->id_entryno) {
 		pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)",
 		    inumber, dp->di_blocks, idesc->id_entryno);
 		if (preen)
 			printf(" (CORRECTED)\n");
 		else if (reply("CORRECT") == 0)
 			return;
 		dp = ginode(inumber);
 		dp->di_blocks = idesc->id_entryno;
 		inodirty();
 	}
 	return;
 unknown:
 	pfatal("UNKNOWN FILE TYPE I=%lu", inumber);
 	statemap[inumber] = FCLEAR;
 	if (reply("CLEAR") == 1) {
 		statemap[inumber] = USTATE;
 		dp = ginode(inumber);
 		clearinode(dp);
 		inodirty();
 	}
 }
 
 int
 pass1check(idesc)
 	register struct inodesc *idesc;
 {
 	int res = KEEPON;
 	int anyout, nfrags;
 	ufs_daddr_t blkno = idesc->id_blkno;
 	register struct dups *dlp;
 	struct dups *new;
 
 	if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) {
 		blkerror(idesc->id_number, "BAD", blkno);
 		if (badblk++ >= MAXBAD) {
 			pwarn("EXCESSIVE BAD BLKS I=%lu",
 				idesc->id_number);
 			if (preen)
 				printf(" (SKIPPING)\n");
-			else if (reply("CONTINUE") == 0)
+			else if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			return (STOP);
 		}
 	}
 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
 		if (anyout && chkrange(blkno, 1)) {
 			res = SKIP;
 		} else if (!testbmap(blkno)) {
 			n_blks++;
 			setbmap(blkno);
 		} else {
 			blkerror(idesc->id_number, "DUP", blkno);
 			if (dupblk++ >= MAXDUP) {
 				pwarn("EXCESSIVE DUP BLKS I=%lu",
 					idesc->id_number);
 				if (preen)
 					printf(" (SKIPPING)\n");
-				else if (reply("CONTINUE") == 0)
+				else if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new = (struct dups *)malloc(sizeof(struct dups));
 			if (new == NULL) {
 				pfatal("DUP TABLE OVERFLOW.");
-				if (reply("CONTINUE") == 0)
+				if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new->dup = blkno;
 			if (muldup == 0) {
 				duplist = muldup = new;
 				new->next = 0;
 			} else {
 				new->next = muldup->next;
 				muldup->next = new;
 			}
 			for (dlp = duplist; dlp != muldup; dlp = dlp->next)
 				if (dlp->dup == blkno)
 					break;
 			if (dlp == muldup && dlp->dup != blkno)
 				muldup = new;
 		}
 		/*
 		 * count the number of blocks found in id_entryno
 		 */
 		idesc->id_entryno++;
 	}
 	return (res);
 }
Index: head/sbin/fsck/pass2.c
===================================================================
--- head/sbin/fsck/pass2.c	(revision 34265)
+++ head/sbin/fsck/pass2.c	(revision 34266)
@@ -1,467 +1,482 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass2.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 #define MINDIRSIZE	(sizeof (struct dirtemplate))
 
 static int blksort __P((const void *, const void *));
 static int pass2check __P((struct inodesc *));
 
 void
 pass2()
 {
 	register struct dinode *dp;
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	struct inodesc curino;
 	struct dinode dino;
 	char pathbuf[MAXPATHLEN + 1];
 
 	switch (statemap[ROOTINO]) {
 
 	case USTATE:
 		pfatal("ROOT INODE UNALLOCATED");
-		if (reply("ALLOCATE") == 0)
+		if (reply("ALLOCATE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 			errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 		break;
 
 	case DCLEAR:
 		pfatal("DUPS/BAD IN ROOT INODE");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("CONTINUE") == 0)
+		if (reply("CONTINUE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		break;
 
 	case FSTATE:
 	case FCLEAR:
 		pfatal("ROOT INODE NOT DIRECTORY");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("FIX") == 0)
+		if (reply("FIX") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		dp = ginode(ROOTINO);
 		dp->di_mode &= ~IFMT;
 		dp->di_mode |= IFDIR;
 		inodirty();
 		break;
 
 	case DSTATE:
 		break;
 
 	default:
 		errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]);
 	}
 	statemap[ROOTINO] = DFOUND;
 	if (newinofmt) {
 		statemap[WINO] = FSTATE;
 		typemap[WINO] = DT_WHT;
 	}
 	/*
 	 * Sort the directory list into disk block order.
 	 */
 	qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort);
 	/*
 	 * Check the integrity of each directory.
 	 */
 	memset(&curino, 0, sizeof(struct inodesc));
 	curino.id_type = DATA;
 	curino.id_func = pass2check;
 	dp = &dino;
 	inpend = &inpsort[inplast];
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_isize == 0)
 			continue;
 		if (inp->i_isize < MINDIRSIZE) {
 			direrror(inp->i_number, "DIRECTORY TOO SHORT");
 			inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ);
 			if (reply("FIX") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = inp->i_isize;
 				inodirty();
 				dp = &dino;
 			}
 		} else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) {
 			getpathname(pathbuf, inp->i_number, inp->i_number);
-			pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d",
-				pathbuf, inp->i_isize, DIRBLKSIZ);
+			if (usedsoftdep)
+				pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
+			else
+				pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
 			if (preen)
 				printf(" (ADJUSTED)\n");
 			inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ);
 			if (preen || reply("ADJUST") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = roundup(inp->i_isize, DIRBLKSIZ);
 				inodirty();
 				dp = &dino;
 			}
 		}
 		memset(&dino, 0, sizeof(struct dinode));
 		dino.di_mode = IFDIR;
 		dp->di_size = inp->i_isize;
 		memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks);
 		curino.id_number = inp->i_number;
 		curino.id_parent = inp->i_parent;
 		(void)ckinode(dp, &curino);
 	}
 	/*
 	 * Now that the parents of all directories have been found,
 	 * make another pass to verify the value of `..'
 	 */
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_parent == 0 || inp->i_isize == 0)
 			continue;
 		if (statemap[inp->i_parent] == DFOUND &&
 		    statemap[inp->i_number] == DSTATE)
 			statemap[inp->i_number] = DFOUND;
 		if (inp->i_dotdot == inp->i_parent ||
 		    inp->i_dotdot == (ino_t)-1)
 			continue;
 		if (inp->i_dotdot == 0) {
 			inp->i_dotdot = inp->i_parent;
 			fileerror(inp->i_parent, inp->i_number, "MISSING '..'");
 			if (reply("FIX") == 0)
 				continue;
 			(void)makeentry(inp->i_number, inp->i_parent, "..");
 			lncntp[inp->i_parent]--;
 			continue;
 		}
 		fileerror(inp->i_parent, inp->i_number,
 		    "BAD INODE NUMBER FOR '..'");
 		if (reply("FIX") == 0)
 			continue;
 		lncntp[inp->i_dotdot]++;
 		lncntp[inp->i_parent]--;
 		inp->i_dotdot = inp->i_parent;
 		(void)changeino(inp->i_number, "..", inp->i_parent);
 	}
 	/*
 	 * Mark all the directories that can be found from the root.
 	 */
 	propagate();
 }
 
 static int
 pass2check(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	register struct inoinfo *inp;
 	int n, entrysize, ret = 0;
 	struct dinode *dp;
 	char *errmsg;
 	struct direct proto;
 	char namebuf[MAXPATHLEN + 1];
 	char pathbuf[MAXPATHLEN + 1];
 
 	/*
 	 * If converting, set directory entry type.
 	 */
 	if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) {
 		dirp->d_type = typemap[dirp->d_ino];
 		ret |= ALTERED;
 	}
 	/*
 	 * check for "."
 	 */
 	if (idesc->id_entryno != 0)
 		goto chk1;
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) {
 		if (dirp->d_ino != idesc->id_number) {
 			direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'");
 			dirp->d_ino = idesc->id_number;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk1;
 	}
 	direrror(idesc->id_number, "MISSING '.'");
 	proto.d_ino = idesc->id_number;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 1;
 	(void)strcpy(proto.d_name, ".");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) {
 		pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 	} else if (dirp->d_reclen < entrysize) {
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n");
 	} else if (dirp->d_reclen < 2 * entrysize) {
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 		n = dirp->d_reclen - entrysize;
 		proto.d_reclen = entrysize;
 		memmove(dirp, &proto, (size_t)entrysize);
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + entrysize);
 		memset(dirp, 0, (size_t)n);
 		dirp->d_reclen = n;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 chk1:
 	if (idesc->id_entryno > 1)
 		goto chk2;
 	inp = getinoinfo(idesc->id_number);
 	proto.d_ino = inp->i_parent;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 2;
 	(void)strcpy(proto.d_name, "..");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (idesc->id_entryno == 0) {
 		n = DIRSIZ(0, dirp);
 		if (dirp->d_reclen < n + entrysize)
 			goto chk2;
 		proto.d_reclen = dirp->d_reclen - n;
 		dirp->d_reclen = n;
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + n);
 		memset(dirp, 0, (size_t)proto.d_reclen);
 		dirp->d_reclen = proto.d_reclen;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) {
 		inp->i_dotdot = dirp->d_ino;
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk2;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 		inp->i_dotdot = (ino_t)-1;
 	} else if (dirp->d_reclen < entrysize) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n");
 		inp->i_dotdot = (ino_t)-1;
 	} else if (inp->i_parent != 0) {
 		/*
 		 * We know the parent, so fix now.
 		 */
 		inp->i_dotdot = inp->i_parent;
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 	idesc->id_entryno++;
 	if (dirp->d_ino != 0)
 		lncntp[dirp->d_ino]--;
 	return (ret|KEEPON);
 chk2:
 	if (dirp->d_ino == 0)
 		return (ret|KEEPON);
 	if (dirp->d_namlen <= 2 &&
 	    dirp->d_name[0] == '.' &&
 	    idesc->id_entryno >= 2) {
 		if (dirp->d_namlen == 1) {
 			direrror(idesc->id_number, "EXTRA '.' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 		if (dirp->d_name[1] == '.') {
 			direrror(idesc->id_number, "EXTRA '..' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 	}
 	idesc->id_entryno++;
 	n = 0;
 	if (dirp->d_ino > maxino) {
 		fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE");
 		n = reply("REMOVE");
 	} else if (newinofmt &&
 		   ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) ||
 		    (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) {
 		fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY");
 		dirp->d_ino = WINO;
 		dirp->d_type = DT_WHT;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 again:
 		switch (statemap[dirp->d_ino]) {
 		case USTATE:
 			if (idesc->id_entryno <= 2)
 				break;
 			fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED");
 			n = reply("REMOVE");
 			break;
 
 		case DCLEAR:
 		case FCLEAR:
 			if (idesc->id_entryno <= 2)
 				break;
 			if (statemap[dirp->d_ino] == FCLEAR)
 				errmsg = "DUP/BAD";
-			else if (!preen)
+			else if (!preen && !usedsoftdep)
 				errmsg = "ZERO LENGTH DIRECTORY";
 			else {
 				n = 1;
 				break;
 			}
 			fileerror(idesc->id_number, dirp->d_ino, errmsg);
 			if ((n = reply("REMOVE")) == 1)
 				break;
 			dp = ginode(dirp->d_ino);
 			statemap[dirp->d_ino] =
 			    (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE;
 			lncntp[dirp->d_ino] = dp->di_nlink;
 			goto again;
 
 		case DSTATE:
 			if (statemap[idesc->id_number] == DFOUND)
 				statemap[dirp->d_ino] = DFOUND;
 			/* fall through */
 
 		case DFOUND:
 			inp = getinoinfo(dirp->d_ino);
 			if (inp->i_parent != 0 && idesc->id_entryno > 2) {
 				getpathname(pathbuf, idesc->id_number,
 				    idesc->id_number);
 				getpathname(namebuf, dirp->d_ino, dirp->d_ino);
 				pwarn("%s %s %s\n", pathbuf,
 				    "IS AN EXTRANEOUS HARD LINK TO DIRECTORY",
 				    namebuf);
-				if (preen)
-					printf(" (IGNORED)\n");
+				if (preen) {
+					printf(" (REMOVED)\n");
+  					n = 1;
+  					break;
+				}
 				else if ((n = reply("REMOVE")) == 1)
 					break;
 			}
 			if (idesc->id_entryno > 2)
 				inp->i_parent = idesc->id_number;
 			/* fall through */
 
 		case FSTATE:
 			if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) {
 				fileerror(idesc->id_number, dirp->d_ino,
 				    "BAD TYPE VALUE");
 				dirp->d_type = typemap[dirp->d_ino];
 				if (reply("FIX") == 1)
 					ret |= ALTERED;
 			}
 			lncntp[dirp->d_ino]--;
 			break;
 
 		default:
 			errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 			    statemap[dirp->d_ino], dirp->d_ino);
 		}
 	}
 	if (n == 0)
 		return (ret|KEEPON);
 	dirp->d_ino = 0;
 	return (ret|KEEPON|ALTERED);
 }
 
 /*
  * Routine to sort disk blocks.
  */
 static int
 blksort(arg1, arg2)
 	const void *arg1, *arg2;
 {
 
 	return ((*(struct inoinfo **)arg1)->i_blks[0] -
 		(*(struct inoinfo **)arg2)->i_blks[0]);
 }
Index: head/sbin/fsck/pass5.c
===================================================================
--- head/sbin/fsck/pass5.c	(revision 34265)
+++ head/sbin/fsck/pass5.c	(revision 34266)
@@ -1,345 +1,375 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass5.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 void
 pass5()
 {
 	int c, blk, frags, basesize, sumsize, mapsize, savednrpos;
+	int inomapsize, blkmapsize;
 	struct fs *fs = &sblock;
 	struct cg *cg = &cgrp;
 	ufs_daddr_t dbase, dmax;
 	ufs_daddr_t d;
-	long i, j;
+	long i, j, k;
 	struct csum *cs;
 	struct csum cstotal;
 	struct inodesc idesc[3];
 	char buf[MAXBSIZE];
 	register struct cg *newcg = (struct cg *)buf;
 	struct ocg *ocg = (struct ocg *)buf;
 
 	statemap[WINO] = USTATE;
 	memset(newcg, 0, (size_t)fs->fs_cgsize);
 	newcg->cg_niblk = fs->fs_ipg;
 	if (cvtlevel >= 3) {
 		if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) {
 			if (preen)
 				pwarn("DELETING CLUSTERING MAPS\n");
 			if (preen || reply("DELETE CLUSTERING MAPS")) {
 				fs->fs_contigsumsize = 0;
 				doinglevel1 = 1;
 				sbdirty();
 			}
 		}
 		if (fs->fs_maxcontig > 1) {
 			char *doit = 0;
 
 			if (fs->fs_contigsumsize < 1) {
 				doit = "CREAT";
 			} else if (fs->fs_contigsumsize < fs->fs_maxcontig &&
 				   fs->fs_contigsumsize < FS_MAXCONTIG) {
 				doit = "EXPAND";
 			}
 			if (doit) {
 				i = fs->fs_contigsumsize;
 				fs->fs_contigsumsize =
 				    MIN(fs->fs_maxcontig, FS_MAXCONTIG);
 				if (CGSIZE(fs) > fs->fs_bsize) {
 					pwarn("CANNOT %s CLUSTER MAPS\n", doit);
 					fs->fs_contigsumsize = i;
 				} else if (preen ||
 				    reply("CREATE CLUSTER MAPS")) {
 					if (preen)
 						pwarn("%sING CLUSTER MAPS\n",
 						    doit);
 					fs->fs_cgsize =
 					    fragroundup(fs, CGSIZE(fs));
 					doinglevel1 = 1;
 					sbdirty();
 				}
 			}
 		}
 	}
 	switch ((int)fs->fs_postblformat) {
 
 	case FS_42POSTBLFMT:
 		basesize = (char *)(&ocg->cg_btot[0]) -
 		    (char *)(&ocg->cg_firstfield);
 		sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]);
 		mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] -
 			(u_char *)&ocg->cg_iused[0];
+		blkmapsize = howmany(fs->fs_fpg, NBBY);
+		inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0];
 		ocg->cg_magic = CG_MAGIC;
 		savednrpos = fs->fs_nrpos;
 		fs->fs_nrpos = 8;
 		break;
 
 	case FS_DYNAMICPOSTBLFMT:
 		newcg->cg_btotoff =
 		     &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield);
 		newcg->cg_boff =
 		    newcg->cg_btotoff + fs->fs_cpg * sizeof(long);
 		newcg->cg_iusedoff = newcg->cg_boff +
 		    fs->fs_cpg * fs->fs_nrpos * sizeof(short);
 		newcg->cg_freeoff =
 		    newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY);
-		if (fs->fs_contigsumsize <= 0) {
-			newcg->cg_nextfreeoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
-		} else {
-			newcg->cg_clustersumoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) -
+		inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff;
+		newcg->cg_nextfreeoff = newcg->cg_freeoff +
+		    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
+		blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff;
+		if (fs->fs_contigsumsize > 0) {
+			newcg->cg_clustersumoff = newcg->cg_nextfreeoff -
 			    sizeof(long);
 			newcg->cg_clustersumoff =
 			    roundup(newcg->cg_clustersumoff, sizeof(long));
 			newcg->cg_clusteroff = newcg->cg_clustersumoff +
 			    (fs->fs_contigsumsize + 1) * sizeof(long);
 			newcg->cg_nextfreeoff = newcg->cg_clusteroff +
 			    howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY);
 		}
 		newcg->cg_magic = CG_MAGIC;
 		basesize = &newcg->cg_space[0] -
 		    (u_char *)(&newcg->cg_firstfield);
 		sumsize = newcg->cg_iusedoff - newcg->cg_btotoff;
 		mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff;
 		break;
 
 	default:
-		sumsize = 0;	/* keep lint happy */
+		inomapsize = blkmapsize = sumsize = 0;	/* keep lint happy */
 		errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d",
 			fs->fs_postblformat);
 	}
 	memset(&idesc[0], 0, sizeof idesc);
 	for (i = 0; i < 3; i++) {
 		idesc[i].id_type = ADDR;
 		if (doinglevel2)
 			idesc[i].id_fix = FIX;
 	}
 	memset(&cstotal, 0, sizeof(struct csum));
 	j = blknum(fs, fs->fs_size + fs->fs_frag - 1);
 	for (i = fs->fs_size; i < j; i++)
 		setbmap(i);
 	for (c = 0; c < fs->fs_ncg; c++) {
 		getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
 		if (!cg_chkmagic(cg))
 			pfatal("CG %d: BAD MAGIC NUMBER\n", c);
 		dbase = cgbase(fs, c);
 		dmax = dbase + fs->fs_fpg;
 		if (dmax > fs->fs_size)
 			dmax = fs->fs_size;
 		newcg->cg_time = cg->cg_time;
 		newcg->cg_cgx = c;
 		if (c == fs->fs_ncg - 1)
 			newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg;
 		else
 			newcg->cg_ncyl = fs->fs_cpg;
 		newcg->cg_ndblk = dmax - dbase;
 		if (fs->fs_contigsumsize > 0)
 			newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag;
 		newcg->cg_cs.cs_ndir = 0;
 		newcg->cg_cs.cs_nffree = 0;
 		newcg->cg_cs.cs_nbfree = 0;
 		newcg->cg_cs.cs_nifree = fs->fs_ipg;
 		if (cg->cg_rotor < newcg->cg_ndblk)
 			newcg->cg_rotor = cg->cg_rotor;
 		else
 			newcg->cg_rotor = 0;
 		if (cg->cg_frotor < newcg->cg_ndblk)
 			newcg->cg_frotor = cg->cg_frotor;
 		else
 			newcg->cg_frotor = 0;
 		if (cg->cg_irotor < newcg->cg_niblk)
 			newcg->cg_irotor = cg->cg_irotor;
 		else
 			newcg->cg_irotor = 0;
 		memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum);
 		memset(&cg_blktot(newcg)[0], 0,
 		      (size_t)(sumsize + mapsize));
 		if (fs->fs_postblformat == FS_42POSTBLFMT)
 			ocg->cg_magic = CG_MAGIC;
 		j = fs->fs_ipg * c;
 		for (i = 0; i < fs->fs_ipg; j++, i++) {
 			switch (statemap[j]) {
 
 			case USTATE:
 				break;
 
 			case DSTATE:
 			case DCLEAR:
 			case DFOUND:
 				newcg->cg_cs.cs_ndir++;
 				/* fall through */
 
 			case FSTATE:
 			case FCLEAR:
 				newcg->cg_cs.cs_nifree--;
 				setbit(cg_inosused(newcg), i);
 				break;
 
 			default:
 				if (j < ROOTINO)
 					break;
 				errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 				    statemap[j], j);
 			}
 		}
 		if (c == 0)
 			for (i = 0; i < ROOTINO; i++) {
 				setbit(cg_inosused(newcg), i);
 				newcg->cg_cs.cs_nifree--;
 			}
 		for (i = 0, d = dbase;
 		     d < dmax;
 		     d += fs->fs_frag, i += fs->fs_frag) {
 			frags = 0;
 			for (j = 0; j < fs->fs_frag; j++) {
 				if (testbmap(d + j))
 					continue;
 				setbit(cg_blksfree(newcg), i + j);
 				frags++;
 			}
 			if (frags == fs->fs_frag) {
 				newcg->cg_cs.cs_nbfree++;
 				j = cbtocylno(fs, i);
 				cg_blktot(newcg)[j]++;
 				cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++;
 				if (fs->fs_contigsumsize > 0)
 					setbit(cg_clustersfree(newcg),
 					    i / fs->fs_frag);
 			} else if (frags > 0) {
 				newcg->cg_cs.cs_nffree += frags;
 				blk = blkmap(fs, cg_blksfree(newcg), i);
 				ffs_fragacct(fs, blk, newcg->cg_frsum, 1);
 			}
 		}
 		if (fs->fs_contigsumsize > 0) {
 			int32_t *sump = cg_clustersum(newcg);
 			u_char *mapp = cg_clustersfree(newcg);
 			int map = *mapp++;
 			int bit = 1;
 			int run = 0;
 
 			for (i = 0; i < newcg->cg_nclusterblks; i++) {
 				if ((map & bit) != 0) {
 					run++;
 				} else if (run != 0) {
 					if (run > fs->fs_contigsumsize)
 						run = fs->fs_contigsumsize;
 					sump[run]++;
 					run = 0;
 				}
 				if ((i & (NBBY - 1)) != (NBBY - 1)) {
 					bit <<= 1;
 				} else {
 					map = *mapp++;
 					bit = 1;
 				}
 			}
 			if (run != 0) {
 				if (run > fs->fs_contigsumsize)
 					run = fs->fs_contigsumsize;
 				sump[run]++;
 			}
 		}
 		cstotal.cs_nffree += newcg->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += newcg->cg_cs.cs_nifree;
 		cstotal.cs_ndir += newcg->cg_cs.cs_ndir;
 		cs = &fs->fs_cs(fs, c);
 		if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 &&
 		    dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 			memmove(cs, &newcg->cg_cs, sizeof *cs);
 			sbdirty();
 		}
 		if (doinglevel1) {
 			memmove(cg, newcg, (size_t)fs->fs_cgsize);
 			cgdirty();
 			continue;
 		}
-		if (memcmp(cg_inosused(newcg),
-			 cg_inosused(cg), mapsize) != 0 &&
-		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
-			memmove(cg_inosused(cg), cg_inosused(newcg),
-			      (size_t)mapsize);
-			cgdirty();
-		}
 		if ((memcmp(newcg, cg, basesize) != 0 ||
 		     memcmp(&cg_blktot(newcg)[0],
 			  &cg_blktot(cg)[0], sumsize) != 0) &&
 		    dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
 			memmove(cg, newcg, (size_t)basesize);
 			memmove(&cg_blktot(cg)[0],
 			       &cg_blktot(newcg)[0], (size_t)sumsize);
+			cgdirty();
+		}
+		if (usedsoftdep) {
+			for (i = 0; i < inomapsize; i++) {
+				j = cg_inosused(newcg)[i];
+				if ((cg_inosused(cg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED INODE %d MARKED FREE",
+					    c * fs->fs_ipg + i * 8 + k);
+				}
+			}
+			for (i = 0; i < blkmapsize; i++) {
+				j = cg_blksfree(cg)[i];
+				if ((cg_blksfree(newcg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED FRAG %d MARKED FREE",
+					    c * fs->fs_fpg + i * 8 + k);
+				}
+			}
+		}
+		if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 &&
+		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
+			memmove(cg_inosused(cg), cg_inosused(newcg),
+			      (size_t)mapsize);
 			cgdirty();
 		}
 	}
 	if (fs->fs_postblformat == FS_42POSTBLFMT)
 		fs->fs_nrpos = savednrpos;
 	if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0
 	    && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 		memmove(&fs->fs_cstotal, &cstotal, sizeof *cs);
 		fs->fs_ronly = 0;
 		sbdirty();
 	}
 	if (fs->fs_fmod != 0) {
 		pwarn("MODIFIED FLAG SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_fmod = 0;
 			sbdirty();
 		}
 	}
 	if (fs->fs_clean == 0) {
 		pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_clean = 1;
 			sbdirty();
 		}
 	}
 }
Index: head/sbin/fsck/setup.c
===================================================================
--- head/sbin/fsck/setup.c	(revision 34265)
+++ head/sbin/fsck/setup.c	(revision 34266)
@@ -1,514 +1,520 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)setup.c	8.10 (Berkeley) 5/9/95";
 #endif /* not lint */
 
 #define DKTYPENAMES
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/disklabel.h>
 #include <sys/file.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <string.h>
 
 #include "fsck.h"
 
 struct bufarea asblk;
 #define altsblock (*asblk.b_un.b_fs)
 #define POWEROF2(num)	(((num) & ((num) - 1)) == 0)
 
 static void badsb __P((int listerr, char *s));
 static int calcsb __P((char *dev, int devfd, struct fs *fs));
 static struct disklabel *getdisklabel __P((char *s, int fd));
 static int readsb __P((int listerr));
 
 /*
  * Read in a superblock finding an alternate if necessary.
  * Return 1 if successful, 0 if unsuccessful, -1 if filesystem
  * is already clean (preen mode only).
  */
 int
 setup(dev)
 	char *dev;
 {
 	long cg, size, asked, i, j;
 	long skipclean, bmapsize;
 	struct disklabel *lp;
 	off_t sizepb;
 	struct stat statb;
 	struct fs proto;
 
 	havesb = 0;
 	fswritefd = -1;
 	skipclean = preen;
 	if (stat(dev, &statb) < 0) {
 		printf("Can't stat %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if ((statb.st_mode & S_IFMT) != S_IFCHR) {
 		pfatal("%s is not a character device", dev);
 		if (reply("CONTINUE") == 0)
 			return (0);
 	}
 	if ((fsreadfd = open(dev, O_RDONLY)) < 0) {
 		printf("Can't open %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if (preen == 0)
 		printf("** %s", dev);
 	if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) {
 		fswritefd = -1;
 		if (preen)
 			pfatal("NO WRITE ACCESS");
 		printf(" (NO WRITE)");
 	}
 	if (preen == 0)
 		printf("\n");
 	fsmodified = 0;
 	lfdir = 0;
 	initbarea(&sblk);
 	initbarea(&asblk);
 	sblk.b_un.b_buf = malloc(SBSIZE);
 	asblk.b_un.b_buf = malloc(SBSIZE);
 	if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL)
 		errx(EEXIT, "cannot allocate space for superblock");
 	lp = getdisklabel((char *)NULL, fsreadfd);
 	if (lp)
 		dev_bsize = secsize = lp->d_secsize;
 	else
 		dev_bsize = secsize = DEV_BSIZE;
 	/*
 	 * Read in the superblock, looking for alternates if necessary
 	 */
 	if (readsb(1) == 0) {
 		skipclean = 0;
 		if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0)
 			return(0);
 		if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0)
 			return (0);
 		for (cg = 0; cg < proto.fs_ncg; cg++) {
 			bflag = fsbtodb(&proto, cgsblock(&proto, cg));
 			if (readsb(0) != 0)
 				break;
 		}
 		if (cg >= proto.fs_ncg) {
 			printf("%s %s\n%s %s\n%s %s\n",
 				"SEARCH FOR ALTERNATE SUPER-BLOCK",
 				"FAILED. YOU MUST USE THE",
 				"-b OPTION TO FSCK TO SPECIFY THE",
 				"LOCATION OF AN ALTERNATE",
 				"SUPER-BLOCK TO SUPPLY NEEDED",
 				"INFORMATION; SEE fsck(8).");
 			bflag = 0;
 			return(0);
 		}
 		pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
 		bflag = 0;
 	}
 	maxfsblock = sblock.fs_size;
 	maxino = sblock.fs_ncg * sblock.fs_ipg;
 	/*
 	 * Check and potentially fix certain fields in the super block.
 	 */
 	if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) {
 		pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_optim = FS_OPTTIME;
 			sbdirty();
 		}
 	}
 	if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) {
 		pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK",
 			sblock.fs_minfree);
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_minfree = 10;
 			sbdirty();
 		}
 	}
 	if (sblock.fs_interleave < 1 ||
 	    sblock.fs_interleave > sblock.fs_nsect) {
 		pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK",
 			sblock.fs_interleave);
 		sblock.fs_interleave = 1;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_npsect < sblock.fs_nsect ||
 	    sblock.fs_npsect > sblock.fs_nsect*2) {
 		pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK",
 			sblock.fs_npsect);
 		sblock.fs_npsect = sblock.fs_nsect;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_inodefmt >= FS_44INODEFMT) {
 		newinofmt = 1;
 	} else {
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		newinofmt = 0;
 	}
 	/*
 	 * Convert to new inode format.
 	 */
 	if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW INODE FORMAT\n");
 		else if (!reply("CONVERT TO NEW INODE FORMAT"))
 			return(0);
 		doinglevel2++;
 		sblock.fs_inodefmt = FS_44INODEFMT;
 		sizepb = sblock.fs_bsize;
 		sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1;
 		for (i = 0; i < NIADDR; i++) {
 			sizepb *= NINDIR(&sblock);
 			sblock.fs_maxfilesize += sizepb;
 		}
 		sblock.fs_maxsymlinklen = MAXSYMLINKLEN;
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		sbdirty();
 		dirty(&asblk);
 	}
 	/*
 	 * Convert to new cylinder group format.
 	 */
 	if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n");
 		else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT"))
 			return(0);
 		doinglevel1++;
 		sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
 		sblock.fs_nrpos = 8;
 		sblock.fs_postbloff =
 		    (char *)(&sblock.fs_opostbl[0][0]) -
 		    (char *)(&sblock.fs_firstfield);
 		sblock.fs_rotbloff = &sblock.fs_space[0] -
 		    (u_char *)(&sblock.fs_firstfield);
 		sblock.fs_cgsize =
 			fragroundup(&sblock, CGSIZE(&sblock));
 		sbdirty();
 		dirty(&asblk);
 	}
 	if (asblk.b_dirty && !bflag) {
 		memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize);
 		flush(fswritefd, &asblk);
 	}
 	/*
 	 * read in the summary info.
 	 */
 	asked = 0;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize;
 		sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size);
 		if (bread(fsreadfd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    size) != 0 && !asked) {
 			pfatal("BAD SUMMARY INFORMATION");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			asked++;
 		}
 	}
 	/*
 	 * If we survive the above basic checks and are preening,
 	 * quit here unless forced.
 	 */
 	if (skipclean && sblock.fs_clean && !fflag)
 		return (-1);
 	/*
 	 * allocate and initialize the necessary maps
 	 */
 	bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short));
 	blockmap = calloc((unsigned)bmapsize, sizeof (char));
 	if (blockmap == NULL) {
 		printf("cannot alloc %u bytes for blockmap\n",
 		    (unsigned)bmapsize);
 		goto badsb;
 	}
 	statemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (statemap == NULL) {
 		printf("cannot alloc %u bytes for statemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	typemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (typemap == NULL) {
 		printf("cannot alloc %u bytes for typemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short));
 	if (lncntp == NULL) {
 		printf("cannot alloc %u bytes for lncntp\n",
 		    (unsigned)(maxino + 1) * sizeof(short));
 		goto badsb;
 	}
 	numdirs = sblock.fs_cstotal.cs_ndir;
 	if (numdirs == 0) {
 		printf("numdirs is zero, try using an alternate superblock\n");
 		goto badsb;
 	}
 	inplast = 0;
 	listmax = numdirs + 10;
 	inpsort = (struct inoinfo **)calloc((unsigned)listmax,
 	    sizeof(struct inoinfo *));
 	inphead = (struct inoinfo **)calloc((unsigned)numdirs,
 	    sizeof(struct inoinfo *));
 	if (inpsort == NULL || inphead == NULL) {
 		printf("cannot alloc %u bytes for inphead\n",
 		    (unsigned)numdirs * sizeof(struct inoinfo *));
 		goto badsb;
 	}
 	bufinit();
+	if (sblock.fs_flags & FS_DOSOFTDEP)
+		usedsoftdep = 1;
+	else
+		usedsoftdep = 0;
 	return (1);
 
 badsb:
 	ckfini(0);
 	return (0);
 }
 
 /*
  * Read in the super block and its summary info.
  */
 static int
 readsb(listerr)
 	int listerr;
 {
 	ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize;
 
 	if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0)
 		return (0);
 	sblk.b_bno = super;
 	sblk.b_size = SBSIZE;
 	/*
 	 * run a few consistency checks of the super block
 	 */
 	if (sblock.fs_magic != FS_MAGIC)
 		{ badsb(listerr, "MAGIC NUMBER WRONG"); return (0); }
 	if (sblock.fs_ncg < 1)
 		{ badsb(listerr, "NCG OUT OF RANGE"); return (0); }
 	if (sblock.fs_cpg < 1)
 		{ badsb(listerr, "CPG OUT OF RANGE"); return (0); }
 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl)
 		{ badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); }
 	if (sblock.fs_sbsize > SBSIZE)
 		{ badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); }
 	/*
 	 * Compute block size that the filesystem is based on,
 	 * according to fsbtodb, and adjust superblock block number
 	 * so we can tell if this is an alternate later.
 	 */
 	super *= dev_bsize;
 	dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1);
 	sblk.b_bno = super / dev_bsize;
 	if (bflag) {
 		havesb = 1;
 		return (1);
 	}
 	/*
 	 * Set all possible fields that could differ, then do check
 	 * of whole super block against an alternate super block.
 	 * When an alternate super-block is specified this check is skipped.
 	 */
 	getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize);
 	if (asblk.b_errs)
 		return (0);
 	altsblock.fs_firstfield = sblock.fs_firstfield;
 	altsblock.fs_unused_1 = sblock.fs_unused_1;
 	altsblock.fs_time = sblock.fs_time;
 	altsblock.fs_cstotal = sblock.fs_cstotal;
 	altsblock.fs_cgrotor = sblock.fs_cgrotor;
 	altsblock.fs_fmod = sblock.fs_fmod;
 	altsblock.fs_clean = sblock.fs_clean;
 	altsblock.fs_ronly = sblock.fs_ronly;
 	altsblock.fs_flags = sblock.fs_flags;
 	altsblock.fs_maxcontig = sblock.fs_maxcontig;
 	altsblock.fs_minfree = sblock.fs_minfree;
 	altsblock.fs_optim = sblock.fs_optim;
 	altsblock.fs_rotdelay = sblock.fs_rotdelay;
 	altsblock.fs_maxbpg = sblock.fs_maxbpg;
 	memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp);
 	altsblock.fs_maxcluster = sblock.fs_maxcluster;
 	memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt);
 	memmove(altsblock.fs_sparecon,
 		sblock.fs_sparecon, sizeof sblock.fs_sparecon);
 	/*
 	 * The following should not have to be copied.
 	 */
 	altsblock.fs_fsbtodb = sblock.fs_fsbtodb;
 	altsblock.fs_interleave = sblock.fs_interleave;
 	altsblock.fs_npsect = sblock.fs_npsect;
 	altsblock.fs_nrpos = sblock.fs_nrpos;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_qbmask = sblock.fs_qbmask;
 	altsblock.fs_qfmask = sblock.fs_qfmask;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_maxfilesize = sblock.fs_maxfilesize;
 	if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) {
 		if (debug) {
 			long *nlp, *olp, *endlp;
 
 			printf("superblock mismatches\n");
 			nlp = (long *)&altsblock;
 			olp = (long *)&sblock;
 			endlp = olp + (sblock.fs_sbsize / sizeof *olp);
 			for ( ; olp < endlp; olp++, nlp++) {
 				if (*olp == *nlp)
 					continue;
 				printf("offset %d, original %d, alternate %d\n",
 				    olp - (long *)&sblock, *olp, *nlp);
 			}
 		}
 		badsb(listerr,
 		"VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE");
 		return (0);
 	}
 	havesb = 1;
 	return (1);
 }
 
 static void
 badsb(listerr, s)
 	int listerr;
 	char *s;
 {
 
 	if (!listerr)
 		return;
 	if (preen)
 		printf("%s: ", cdevname);
 	pfatal("BAD SUPER BLOCK: %s\n", s);
 }
 
 /*
  * Calculate a prototype superblock based on information in the disk label.
  * When done the cgsblock macro can be calculated and the fs_ncg field
  * can be used. Do NOT attempt to use other macros without verifying that
  * their needed information is available!
  */
 static int
 calcsb(dev, devfd, fs)
 	char *dev;
 	int devfd;
 	register struct fs *fs;
 {
 	register struct disklabel *lp;
 	register struct partition *pp;
 	register char *cp;
 	int i;
 
 	cp = strchr(dev, '\0') - 1;
 	if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) {
 		pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev);
 		return (0);
 	}
 	lp = getdisklabel(dev, devfd);
 	if (isdigit(*cp))
 		pp = &lp->d_partitions[0];
 	else
 		pp = &lp->d_partitions[*cp - 'a'];
 	if (pp->p_fstype != FS_BSDFFS) {
 		pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n",
 			dev, pp->p_fstype < FSMAXTYPES ?
 			fstypenames[pp->p_fstype] : "unknown");
 		return (0);
 	}
 	if (pp->p_fsize == 0 || pp->p_frag == 0) {
 		pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n",
 			dev, fstypenames[pp->p_fstype]);
 		return (0);
 	}
 	memset(fs, 0, sizeof(struct fs));
 	fs->fs_fsize = pp->p_fsize;
 	fs->fs_frag = pp->p_frag;
 	fs->fs_cpg = pp->p_cpg;
 	fs->fs_size = pp->p_size;
 	fs->fs_ntrak = lp->d_ntracks;
 	fs->fs_nsect = lp->d_nsectors;
 	fs->fs_spc = lp->d_secpercyl;
 	fs->fs_nspf = fs->fs_fsize / lp->d_secsize;
 	fs->fs_sblkno = roundup(
 		howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize),
 		fs->fs_frag);
 	fs->fs_cgmask = 0xffffffff;
 	for (i = fs->fs_ntrak; i > 1; i >>= 1)
 		fs->fs_cgmask <<= 1;
 	if (!POWEROF2(fs->fs_ntrak))
 		fs->fs_cgmask <<= 1;
 	fs->fs_cgoffset = roundup(
 		howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag);
 	fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs);
 	fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg);
 	for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1)
 		fs->fs_fsbtodb++;
 	dev_bsize = lp->d_secsize;
 	return (1);
 }
 
 static struct disklabel *
 getdisklabel(s, fd)
 	char *s;
 	int	fd;
 {
 	static struct disklabel lab;
 
 	if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) {
 		if (s == NULL)
 			return ((struct disklabel *)NULL);
 		pwarn("ioctl (GCINFO): %s\n", strerror(errno));
 		errx(EEXIT, "%s: can't read disk label", s);
 	}
 	return (&lab);
 }
Index: head/sbin/fsck/utilities.c
===================================================================
--- head/sbin/fsck/utilities.c	(revision 34265)
+++ head/sbin/fsck/utilities.c	(revision 34266)
@@ -1,625 +1,648 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)utilities.c	8.6 (Berkeley) 5/19/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 long	diskreads, totalreads;	/* Disk cache statistics */
 
 static void rwerror __P((char *mesg, ufs_daddr_t blk));
 
 int
 ftypeok(dp)
 	struct dinode *dp;
 {
 	switch (dp->di_mode & IFMT) {
 
 	case IFDIR:
 	case IFREG:
 	case IFBLK:
 	case IFCHR:
 	case IFLNK:
 	case IFSOCK:
 	case IFIFO:
 		return (1);
 
 	default:
 		if (debug)
 			printf("bad file type 0%o\n", dp->di_mode);
 		return (0);
 	}
 }
 
 int
 reply(question)
 	char *question;
 {
 	int persevere;
 	char c;
 
 	if (preen)
 		pfatal("INTERNAL ERROR: GOT TO reply()");
 	persevere = !strcmp(question, "CONTINUE");
 	printf("\n");
 	if (!persevere && (nflag || fswritefd < 0)) {
 		printf("%s? no\n\n", question);
+		resolved = 0;
 		return (0);
 	}
 	if (yflag || (persevere && nflag)) {
 		printf("%s? yes\n\n", question);
 		return (1);
 	}
 	do	{
 		printf("%s? [yn] ", question);
 		(void) fflush(stdout);
 		c = getc(stdin);
-		while (c != '\n' && getc(stdin) != '\n')
-			if (feof(stdin))
+		while (c != '\n' && getc(stdin) != '\n') {
+			if (feof(stdin)) {
+				resolved = 0;
 				return (0);
+			}
+		}
 	} while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
 	printf("\n");
 	if (c == 'y' || c == 'Y')
 		return (1);
+	resolved = 0;
 	return (0);
 }
 
 /*
  * Malloc buffers and set up cache.
  */
 void
 bufinit()
 {
 	register struct bufarea *bp;
 	long bufcnt, i;
 	char *bufp;
 
 	pbp = pdirbp = (struct bufarea *)0;
 	bufp = malloc((unsigned int)sblock.fs_bsize);
 	if (bufp == 0)
 		errx(EEXIT, "cannot allocate buffer pool");
 	cgblk.b_un.b_buf = bufp;
 	initbarea(&cgblk);
 	bufhead.b_next = bufhead.b_prev = &bufhead;
 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
 	if (bufcnt < MINBUFS)
 		bufcnt = MINBUFS;
 	for (i = 0; i < bufcnt; i++) {
 		bp = (struct bufarea *)malloc(sizeof(struct bufarea));
 		bufp = malloc((unsigned int)sblock.fs_bsize);
 		if (bp == NULL || bufp == NULL) {
 			if (i >= MINBUFS)
 				break;
 			errx(EEXIT, "cannot allocate buffer pool");
 		}
 		bp->b_un.b_buf = bufp;
 		bp->b_prev = &bufhead;
 		bp->b_next = bufhead.b_next;
 		bufhead.b_next->b_prev = bp;
 		bufhead.b_next = bp;
 		initbarea(bp);
 	}
 	bufhead.b_size = i;	/* save number of buffers */
 }
 
 /*
  * Manage a cache of directory blocks.
  */
 struct bufarea *
 getdatablk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 	register struct bufarea *bp;
 
 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
 		if (bp->b_bno == fsbtodb(&sblock, blkno))
 			goto foundit;
 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
 		if ((bp->b_flags & B_INUSE) == 0)
 			break;
 	if (bp == &bufhead)
 		errx(EEXIT, "deadlocked buffer pool");
 	getblk(bp, blkno, size);
 	/* fall through */
 foundit:
 	totalreads++;
 	bp->b_prev->b_next = bp->b_next;
 	bp->b_next->b_prev = bp->b_prev;
 	bp->b_prev = &bufhead;
 	bp->b_next = bufhead.b_next;
 	bufhead.b_next->b_prev = bp;
 	bufhead.b_next = bp;
 	bp->b_flags |= B_INUSE;
 	return (bp);
 }
 
 void
 getblk(bp, blk, size)
 	register struct bufarea *bp;
 	ufs_daddr_t blk;
 	long size;
 {
 	ufs_daddr_t dblk;
 
 	dblk = fsbtodb(&sblock, blk);
 	if (bp->b_bno != dblk) {
 		flush(fswritefd, bp);
 		diskreads++;
 		bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size);
 		bp->b_bno = dblk;
 		bp->b_size = size;
 	}
 }
 
 void
 flush(fd, bp)
 	int fd;
 	register struct bufarea *bp;
 {
 	register int i, j;
 
 	if (!bp->b_dirty)
 		return;
 	if (bp->b_errs != 0)
 		pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n",
 		    (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
 		    bp->b_bno);
 	bp->b_dirty = 0;
 	bp->b_errs = 0;
 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
 	if (bp != &sblk)
 		return;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		bwrite(fswritefd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize);
 	}
 }
 
 static void
 rwerror(mesg, blk)
 	char *mesg;
 	ufs_daddr_t blk;
 {
 
 	if (preen == 0)
 		printf("\n");
 	pfatal("CANNOT %s: BLK %ld", mesg, blk);
 	if (reply("CONTINUE") == 0)
 		exit(EEXIT);
 }
 
 void
 ckfini(markclean)
 	int markclean;
 {
 	register struct bufarea *bp, *nbp;
 	int ofsmodified, cnt = 0;
 
 	if (fswritefd < 0) {
 		(void)close(fsreadfd);
 		return;
 	}
 	flush(fswritefd, &sblk);
 	if (havesb && sblk.b_bno != SBOFF / dev_bsize &&
 	    !preen && reply("UPDATE STANDARD SUPERBLOCK")) {
 		sblk.b_bno = SBOFF / dev_bsize;
 		sbdirty();
 		flush(fswritefd, &sblk);
 	}
 	flush(fswritefd, &cgblk);
 	free(cgblk.b_un.b_buf);
 	for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) {
 		cnt++;
 		flush(fswritefd, bp);
 		nbp = bp->b_prev;
 		free(bp->b_un.b_buf);
 		free((char *)bp);
 	}
 	if (bufhead.b_size != cnt)
 		errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt);
 	pbp = pdirbp = (struct bufarea *)0;
 	if (markclean && sblock.fs_clean == 0) {
 		sblock.fs_clean = 1;
 		sbdirty();
 		ofsmodified = fsmodified;
 		flush(fswritefd, &sblk);
 		fsmodified = ofsmodified;
 		if (!preen)
 			printf("\n***** FILE SYSTEM MARKED CLEAN *****\n");
 	}
 	if (debug)
 		printf("cache missed %ld of %ld (%d%%)\n", diskreads,
 		    totalreads, (int)(diskreads * 100 / totalreads));
 	(void)close(fsreadfd);
 	(void)close(fswritefd);
 }
 
 int
 bread(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	char *cp;
 	int i, errs;
 	off_t offset;
 
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (read(fd, buf, (int)size) == size)
 		return (0);
 	rwerror("READ", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	errs = 0;
 	memset(buf, 0, (size_t)size);
 	printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
 	for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
 		if (read(fd, cp, (int)secsize) != secsize) {
 			(void)lseek(fd, offset + i + secsize, 0);
 			if (secsize != dev_bsize && dev_bsize != 1)
 				printf(" %ld (%ld),",
 				    (blk * dev_bsize + i) / secsize,
 				    blk + i / dev_bsize);
 			else
 				printf(" %ld,", blk + i / dev_bsize);
 			errs++;
 		}
 	}
 	printf("\n");
 	return (errs);
 }
 
 void
 bwrite(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	int i;
 	char *cp;
 	off_t offset;
 
 	if (fd < 0)
 		return;
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (write(fd, buf, (int)size) == size) {
 		fsmodified = 1;
 		return;
 	}
 	rwerror("WRITE", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
 	for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
 		if (write(fd, cp, (int)dev_bsize) != dev_bsize) {
 			(void)lseek(fd, offset + i + dev_bsize, 0);
 			printf(" %ld,", blk + i / dev_bsize);
 		}
 	printf("\n");
 	return;
 }
 
 /*
  * allocate a data block with the specified number of fragments
  */
 ufs_daddr_t
 allocblk(frags)
 	long frags;
 {
-	register int i, j, k;
+	int i, j, k, cg, baseblk;
+	struct cg *cgp = &cgrp;
 
 	if (frags <= 0 || frags > sblock.fs_frag)
 		return (0);
 	for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
 		for (j = 0; j <= sblock.fs_frag - frags; j++) {
 			if (testbmap(i + j))
 				continue;
 			for (k = 1; k < frags; k++)
 				if (testbmap(i + j + k))
 					break;
 			if (k < frags) {
 				j += k;
 				continue;
 			}
-			for (k = 0; k < frags; k++)
+			cg = dtog(&sblock, i + j);
+			getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+			if (!cg_chkmagic(cgp))
+				pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+			baseblk = dtogd(&sblock, i + j);
+			for (k = 0; k < frags; k++) {
 				setbmap(i + j + k);
+				clrbit(cg_blksfree(cgp), baseblk + k);
+			}
 			n_blks += frags;
+			if (frags == sblock.fs_frag)
+				cgp->cg_cs.cs_nbfree--;
+			else
+				cgp->cg_cs.cs_nffree -= frags;
+			cgdirty();
 			return (i + j);
 		}
 	}
 	return (0);
 }
 
 /*
  * Free a previously allocated block
  */
 void
 freeblk(blkno, frags)
 	ufs_daddr_t blkno;
 	long frags;
 {
 	struct inodesc idesc;
 
 	idesc.id_blkno = blkno;
 	idesc.id_numfrags = frags;
 	(void)pass4check(&idesc);
 }
 
 /*
  * Find a pathname
  */
 void
 getpathname(namebuf, curdir, ino)
 	char *namebuf;
 	ino_t curdir, ino;
 {
 	int len;
 	register char *cp;
 	struct inodesc idesc;
 	static int busy = 0;
 
 	if (curdir == ino && ino == ROOTINO) {
 		(void)strcpy(namebuf, "/");
 		return;
 	}
 	if (busy ||
 	    (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) {
 		(void)strcpy(namebuf, "?");
 		return;
 	}
 	busy = 1;
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_fix = IGNORE;
 	cp = &namebuf[MAXPATHLEN - 1];
 	*cp = '\0';
 	if (curdir != ino) {
 		idesc.id_parent = curdir;
 		goto namelookup;
 	}
 	while (ino != ROOTINO) {
 		idesc.id_number = ino;
 		idesc.id_func = findino;
 		idesc.id_name = "..";
 		if ((ckinode(ginode(ino), &idesc) & FOUND) == 0)
 			break;
 	namelookup:
 		idesc.id_number = idesc.id_parent;
 		idesc.id_parent = ino;
 		idesc.id_func = findname;
 		idesc.id_name = namebuf;
 		if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0)
 			break;
 		len = strlen(namebuf);
 		cp -= len;
 		memmove(cp, namebuf, (size_t)len);
 		*--cp = '/';
 		if (cp < &namebuf[MAXNAMLEN])
 			break;
 		ino = idesc.id_number;
 	}
 	busy = 0;
 	if (ino != ROOTINO)
 		*--cp = '?';
 	memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
 }
 
 void
 catch(sig)
 	int sig;
 {
 	if (!doinglevel2)
 		ckfini(0);
 	exit(12);
 }
 
 /*
  * When preening, allow a single quit to signal
  * a special exit after filesystem checks complete
  * so that reboot sequence may be interrupted.
  */
 void
 catchquit(sig)
 	int sig;
 {
 	printf("returning to single-user after filesystem check\n");
 	returntosingle = 1;
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * Ignore a single quit signal; wait and flush just in case.
  * Used by child processes in preen.
  */
 void
 voidquit(sig)
 	int sig;
 {
 
 	sleep(1);
 	(void)signal(SIGQUIT, SIG_IGN);
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * determine whether an inode should be fixed.
  */
 int
 dofix(idesc, msg)
 	register struct inodesc *idesc;
 	char *msg;
 {
 
 	switch (idesc->id_fix) {
 
 	case DONTKNOW:
 		if (idesc->id_type == DATA)
 			direrror(idesc->id_number, msg);
 		else
 			pwarn(msg);
 		if (preen) {
 			printf(" (SALVAGED)\n");
 			idesc->id_fix = FIX;
 			return (ALTERED);
 		}
 		if (reply("SALVAGE") == 0) {
 			idesc->id_fix = NOFIX;
 			return (0);
 		}
 		idesc->id_fix = FIX;
 		return (ALTERED);
 
 	case FIX:
 		return (ALTERED);
 
 	case NOFIX:
 	case IGNORE:
 		return (0);
 
 	default:
 		errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
 	}
 	/* NOTREACHED */
 	return (0);
 }
 
 #if __STDC__
 #include <stdarg.h>
 #else
 #include <varargs.h>
 #endif
 
 /*
  * An unexpected inconsistency occured.
- * Die if preening, otherwise just print message and continue.
+ * Die if preening or filesystem is running with soft dependency protocol,
+ * otherwise just print message and continue.
  */
 void
 #if __STDC__
 pfatal(const char *fmt, ...)
 #else
 pfatal(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (!preen) {
 		(void)vfprintf(stderr, fmt, ap);
 		va_end(ap);
+		if (usedsoftdep)
+			(void)fprintf(stderr,
+			    "\nUNEXPECTED SOFTDEP INCONSISTENCY\n");
 		return;
 	}
 	(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	(void)fprintf(stderr,
-	    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
-	    cdevname);
+	    "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
+	    cdevname, usedsoftdep ? " SOFTDEP " : " ");
+	ckfini(0);
 	exit(EEXIT);
 }
 
 /*
- * Pwarn just prints a message when not preening,
- * or a warning (preceded by filename) when preening.
+ * Pwarn just prints a message when not preening or running soft dependency
+ * protocol, or a warning (preceded by filename) when preening.
  */
 void
 #if __STDC__
 pwarn(const char *fmt, ...)
 #else
 pwarn(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (preen)
 		(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 }
 
 /*
  * Stub for routines from kernel.
  */
 void
 #if __STDC__
 panic(const char *fmt, ...)
 #else
 panic(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	pfatal("INTERNAL INCONSISTENCY:");
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	exit(EEXIT);
 }
Index: head/sbin/fsck_ffs/dir.c
===================================================================
--- head/sbin/fsck_ffs/dir.c	(revision 34265)
+++ head/sbin/fsck_ffs/dir.c	(revision 34266)
@@ -1,734 +1,737 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)dir.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 char	*lfname = "lost+found";
 int	lfmode = 01777;
 struct	dirtemplate emptydir = { 0, DIRBLKSIZ };
 struct	dirtemplate dirhead = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 struct	odirtemplate odirhead = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static int chgino __P((struct inodesc *));
 static int dircheck __P((struct inodesc *, struct direct *));
 static int expanddir __P((struct dinode *dp, char *name));
 static void freedir __P((ino_t ino, ino_t parent));
 static struct direct *fsck_readdir __P((struct inodesc *));
 static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size));
 static int lftempname __P((char *bufp, ino_t ino));
 static int mkentry __P((struct inodesc *));
 
 /*
  * Propagate connected state through the tree.
  */
 void
 propagate()
 {
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	long change;
 
 	inpend = &inpsort[inplast];
 	do {
 		change = 0;
 		for (inpp = inpsort; inpp < inpend; inpp++) {
 			inp = *inpp;
 			if (inp->i_parent == 0)
 				continue;
 			if (statemap[inp->i_parent] == DFOUND &&
 			    statemap[inp->i_number] == DSTATE) {
 				statemap[inp->i_number] = DFOUND;
 				change++;
 			}
 		}
 	} while (change > 0);
 }
 
 /*
  * Scan each entry in a directory block.
  */
 int
 dirscan(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp;
 	register struct bufarea *bp;
 	int dsize, n;
 	long blksiz;
 	char dbuf[DIRBLKSIZ];
 
 	if (idesc->id_type != DATA)
 		errx(EEXIT, "wrong type to dirscan %d", idesc->id_type);
 	if (idesc->id_entryno == 0 &&
 	    (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0)
 		idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ);
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
 		idesc->id_filesize -= blksiz;
 		return (SKIP);
 	}
 	idesc->id_loc = 0;
 	for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) {
 		dsize = dp->d_reclen;
 		memmove(dbuf, dp, (size_t)dsize);
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (!newinofmt) {
 				struct direct *tdp = (struct direct *)dbuf;
 				u_char tmp;
 
 				tmp = tdp->d_namlen;
 				tdp->d_namlen = tdp->d_type;
 				tdp->d_type = tmp;
 			}
 #		endif
 		idesc->id_dirp = (struct direct *)dbuf;
 		if ((n = (*idesc->id_func)(idesc)) & ALTERED) {
 #			if (BYTE_ORDER == LITTLE_ENDIAN)
 				if (!newinofmt && !doinglevel2) {
 					struct direct *tdp;
 					u_char tmp;
 
 					tdp = (struct direct *)dbuf;
 					tmp = tdp->d_namlen;
 					tdp->d_namlen = tdp->d_type;
 					tdp->d_type = tmp;
 				}
 #			endif
 			bp = getdirblk(idesc->id_blkno, blksiz);
 			memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf,
 			    (size_t)dsize);
 			dirty(bp);
 			sbdirty();
 		}
 		if (n & STOP)
 			return (n);
 	}
 	return (idesc->id_filesize > 0 ? KEEPON : STOP);
 }
 
 /*
  * get next entry in a directory.
  */
 static struct direct *
 fsck_readdir(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp, *ndp;
 	register struct bufarea *bp;
 	long size, blksiz, fix, dploc;
 
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	bp = getdirblk(idesc->id_blkno, blksiz);
 	if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 &&
 	    idesc->id_loc < blksiz) {
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		if (dircheck(idesc, dp))
 			goto dpok;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		dp->d_reclen = DIRBLKSIZ;
 		dp->d_ino = 0;
 		dp->d_type = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		if (fix)
 			dirty(bp);
 		idesc->id_loc += DIRBLKSIZ;
 		idesc->id_filesize -= DIRBLKSIZ;
 		return (dp);
 	}
 dpok:
 	if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz)
 		return NULL;
 	dploc = idesc->id_loc;
 	dp = (struct direct *)(bp->b_un.b_buf + dploc);
 	idesc->id_loc += dp->d_reclen;
 	idesc->id_filesize -= dp->d_reclen;
 	if ((idesc->id_loc % DIRBLKSIZ) == 0)
 		return (dp);
 	ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 	if (idesc->id_loc < blksiz && idesc->id_filesize > 0 &&
 	    dircheck(idesc, ndp) == 0) {
 		size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 		idesc->id_loc += size;
 		idesc->id_filesize -= size;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + dploc);
 		dp->d_reclen += size;
 		if (fix)
 			dirty(bp);
 	}
 	return (dp);
 }
 
 /*
  * Verify that a directory entry is valid.
  * This is a superset of the checks made in the kernel.
  */
 static int
 dircheck(idesc, dp)
 	struct inodesc *idesc;
 	register struct direct *dp;
 {
 	register int size;
 	register char *cp;
 	u_char namlen, type;
 	int spaceleft;
 
 	spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 	if (dp->d_ino >= maxino ||
 	    dp->d_reclen == 0 ||
 	    dp->d_reclen > spaceleft ||
 	    (dp->d_reclen & 0x3) != 0)
 		return (0);
 	if (dp->d_ino == 0)
 		return (1);
 	size = DIRSIZ(!newinofmt, dp);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (!newinofmt) {
 			type = dp->d_namlen;
 			namlen = dp->d_type;
 		} else {
 			namlen = dp->d_namlen;
 			type = dp->d_type;
 		}
 #	else
 		namlen = dp->d_namlen;
 		type = dp->d_type;
 #	endif
 	if (dp->d_reclen < size ||
 	    idesc->id_filesize < size ||
 	    namlen > MAXNAMLEN ||
 	    type > 15)
 		return (0);
 	for (cp = dp->d_name, size = 0; size < namlen; size++)
 		if (*cp == '\0' || (*cp++ == '/'))
 			return (0);
 	if (*cp != '\0')
 		return (0);
 	return (1);
 }
 
 void
 direrror(ino, errmesg)
 	ino_t ino;
 	char *errmesg;
 {
 
 	fileerror(ino, ino, errmesg);
 }
 
 void
 fileerror(cwd, ino, errmesg)
 	ino_t cwd, ino;
 	char *errmesg;
 {
 	register struct dinode *dp;
 	char pathbuf[MAXPATHLEN + 1];
 
 	pwarn("%s ", errmesg);
 	pinode(ino);
 	printf("\n");
 	getpathname(pathbuf, cwd, ino);
 	if (ino < ROOTINO || ino > maxino) {
 		pfatal("NAME=%s\n", pathbuf);
 		return;
 	}
 	dp = ginode(ino);
 	if (ftypeok(dp))
 		pfatal("%s=%s\n",
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf);
 	else
 		pfatal("NAME=%s\n", pathbuf);
 }
 
 void
 adjust(idesc, lcnt)
 	register struct inodesc *idesc;
 	int lcnt;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (dp->di_nlink == lcnt) {
 		if (linkup(idesc->id_number, (ino_t)0) == 0)
 			clri(idesc, "UNREF", 0);
 	} else {
 		pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname :
 			((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"));
 		pinode(idesc->id_number);
 		printf(" COUNT %d SHOULD BE %d",
 			dp->di_nlink, dp->di_nlink - lcnt);
-		if (preen) {
+		if (preen || usedsoftdep) {
 			if (lcnt < 0) {
 				printf("\n");
 				pfatal("LINK COUNT INCREASING");
 			}
-			printf(" (ADJUSTED)\n");
+			if (preen)
+				printf(" (ADJUSTED)\n");
 		}
 		if (preen || reply("ADJUST") == 1) {
 			dp->di_nlink -= lcnt;
 			inodirty();
 		}
 	}
 }
 
 static int
 mkentry(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	struct direct newent;
 	int newlen, oldlen;
 
 	newent.d_namlen = strlen(idesc->id_name);
 	newlen = DIRSIZ(0, &newent);
 	if (dirp->d_ino != 0)
 		oldlen = DIRSIZ(0, dirp);
 	else
 		oldlen = 0;
 	if (dirp->d_reclen - oldlen < newlen)
 		return (KEEPON);
 	newent.d_reclen = dirp->d_reclen - oldlen;
 	dirp->d_reclen = oldlen;
 	dirp = (struct direct *)(((char *)dirp) + oldlen);
 	dirp->d_ino = idesc->id_parent;	/* ino to be entered is in id_parent */
 	dirp->d_reclen = newent.d_reclen;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	dirp->d_namlen = newent.d_namlen;
 	memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		/*
 		 * If the entry was split, dirscan() will only reverse the byte
 		 * order of the original entry, and not the new one, before
 		 * writing it back out.  So, we reverse the byte order here if
 		 * necessary.
 		 */
 		if (oldlen != 0 && !newinofmt && !doinglevel2) {
 			u_char tmp;
 
 			tmp = dirp->d_namlen;
 			dirp->d_namlen = dirp->d_type;
 			dirp->d_type = tmp;
 		}
 #	endif
 	return (ALTERED|STOP);
 }
 
 static int
 chgino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1))
 		return (KEEPON);
 	dirp->d_ino = idesc->id_parent;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	return (ALTERED|STOP);
 }
 
 int
 linkup(orphan, parentdir)
 	ino_t orphan;
 	ino_t parentdir;
 {
 	register struct dinode *dp;
 	int lostdir;
 	ino_t oldlfdir;
 	struct inodesc idesc;
 	char tempname[BUFSIZ];
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	dp = ginode(orphan);
 	lostdir = (dp->di_mode & IFMT) == IFDIR;
 	pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
 	pinode(orphan);
-	if (preen && dp->di_size == 0)
+	if ((preen || usedsoftdep) && dp->di_size == 0)
 		return (0);
 	if (preen)
 		printf(" (RECONNECTED)\n");
 	else
 		if (reply("RECONNECT") == 0)
 			return (0);
+	if (parentdir != 0)
+		lncntp[parentdir]++;
 	if (lfdir == 0) {
 		dp = ginode(ROOTINO);
 		idesc.id_name = lfname;
 		idesc.id_type = DATA;
 		idesc.id_func = findino;
 		idesc.id_number = ROOTINO;
 		if ((ckinode(dp, &idesc) & FOUND) != 0) {
 			lfdir = idesc.id_parent;
 		} else {
 			pwarn("NO lost+found DIRECTORY");
 			if (preen || reply("CREATE")) {
 				lfdir = allocdir(ROOTINO, (ino_t)0, lfmode);
 				if (lfdir != 0) {
 					if (makeentry(ROOTINO, lfdir, lfname) != 0) {
 						if (preen)
 							printf(" (CREATED)\n");
 					} else {
 						freedir(lfdir, ROOTINO);
 						lfdir = 0;
 						if (preen)
 							printf("\n");
 					}
 				}
 			}
 		}
 		if (lfdir == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY");
 			printf("\n\n");
 			return (0);
 		}
 	}
 	dp = ginode(lfdir);
 	if ((dp->di_mode & IFMT) != IFDIR) {
 		pfatal("lost+found IS NOT A DIRECTORY");
 		if (reply("REALLOCATE") == 0)
 			return (0);
 		oldlfdir = lfdir;
 		if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		inodirty();
 		idesc.id_type = ADDR;
 		idesc.id_func = pass4check;
 		idesc.id_number = oldlfdir;
 		adjust(&idesc, lncntp[oldlfdir] + 1);
 		lncntp[oldlfdir] = 0;
 		dp = ginode(lfdir);
 	}
 	if (statemap[lfdir] != DFOUND) {
 		pfatal("SORRY. NO lost+found DIRECTORY\n\n");
 		return (0);
 	}
 	(void)lftempname(tempname, orphan);
 	if (makeentry(lfdir, orphan, tempname) == 0) {
 		pfatal("SORRY. NO SPACE IN lost+found DIRECTORY");
 		printf("\n\n");
 		return (0);
 	}
 	lncntp[orphan]--;
 	if (lostdir) {
 		if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
 		    parentdir != (ino_t)-1)
 			(void)makeentry(orphan, lfdir, "..");
 		dp = ginode(lfdir);
 		dp->di_nlink++;
 		inodirty();
 		lncntp[lfdir]++;
 		pwarn("DIR I=%lu CONNECTED. ", orphan);
 		if (parentdir != (ino_t)-1) {
 			printf("PARENT WAS I=%lu\n", parentdir);
 			/*
 			 * The parent directory, because of the ordering
 			 * guarantees, has had the link count incremented
 			 * for the child, but no entry was made.  This
 			 * fixes the parent link count so that fsck does
 			 * not need to be rerun.
 			 */
 			lncntp[parentdir]++;
 
 		}
 		if (preen == 0)
 			printf("\n");
 	}
 	return (1);
 }
 
 /*
  * fix an entry in a directory.
  */
 int
 changeino(dir, name, newnum)
 	ino_t dir;
 	char *name;
 	ino_t newnum;
 {
 	struct inodesc idesc;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = chgino;
 	idesc.id_number = dir;
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	idesc.id_parent = newnum;	/* new value for name */
 	return (ckinode(ginode(dir), &idesc));
 }
 
 /*
  * make an entry in a directory
  */
 int
 makeentry(parent, ino, name)
 	ino_t parent, ino;
 	char *name;
 {
 	struct dinode *dp;
 	struct inodesc idesc;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (parent < ROOTINO || parent >= maxino ||
 	    ino < ROOTINO || ino >= maxino)
 		return (0);
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = mkentry;
 	idesc.id_number = parent;
 	idesc.id_parent = ino;	/* this is the inode to enter */
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	dp = ginode(parent);
 	if (dp->di_size % DIRBLKSIZ) {
 		dp->di_size = roundup(dp->di_size, DIRBLKSIZ);
 		inodirty();
 	}
 	if ((ckinode(dp, &idesc) & ALTERED) != 0)
 		return (1);
 	getpathname(pathbuf, parent, parent);
 	dp = ginode(parent);
 	if (expanddir(dp, pathbuf) == 0)
 		return (0);
 	return (ckinode(dp, &idesc) & ALTERED);
 }
 
 /*
  * Attempt to expand the size of a directory
  */
 static int
 expanddir(dp, name)
 	register struct dinode *dp;
 	char *name;
 {
 	ufs_daddr_t lastbn, newblk;
 	register struct bufarea *bp;
 	char *cp, firstblk[DIRBLKSIZ];
 
 	lastbn = lblkno(&sblock, dp->di_size);
 	if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0)
 		return (0);
 	if ((newblk = allocblk(sblock.fs_frag)) == 0)
 		return (0);
 	dp->di_db[lastbn + 1] = dp->di_db[lastbn];
 	dp->di_db[lastbn] = newblk;
 	dp->di_size += sblock.fs_bsize;
 	dp->di_blocks += btodb(sblock.fs_bsize);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ);
 	bp = getdirblk(newblk, sblock.fs_bsize);
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ);
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_bsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir);
 	pwarn("NO SPACE LEFT IN %s", name);
 	if (preen)
 		printf(" (EXPANDED)\n");
 	else if (reply("EXPAND") == 0)
 		goto bad;
 	dirty(bp);
 	inodirty();
 	return (1);
 bad:
 	dp->di_db[lastbn] = dp->di_db[lastbn + 1];
 	dp->di_db[lastbn + 1] = 0;
 	dp->di_size -= sblock.fs_bsize;
 	dp->di_blocks -= btodb(sblock.fs_bsize);
 	freeblk(newblk, sblock.fs_frag);
 	return (0);
 }
 
 /*
  * allocate a new directory
  */
 ino_t
 allocdir(parent, request, mode)
 	ino_t parent, request;
 	int mode;
 {
 	ino_t ino;
 	char *cp;
 	struct dinode *dp;
 	register struct bufarea *bp;
 	struct dirtemplate *dirp;
 
 	ino = allocino(request, IFDIR|mode);
 	if (newinofmt)
 		dirp = &dirhead;
 	else
 		dirp = (struct dirtemplate *)&odirhead;
 	dirp->dot_ino = ino;
 	dirp->dotdot_ino = parent;
 	dp = ginode(ino);
 	bp = getdirblk(dp->di_db[0], sblock.fs_fsize);
 	if (bp->b_errs) {
 		freeino(ino);
 		return (0);
 	}
 	memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate));
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_fsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	dp->di_nlink = 2;
 	inodirty();
 	if (ino == ROOTINO) {
 		lncntp[ino] = dp->di_nlink;
 		cacheino(dp, ino);
 		return(ino);
 	}
 	if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) {
 		freeino(ino);
 		return (0);
 	}
 	cacheino(dp, ino);
 	statemap[ino] = statemap[parent];
 	if (statemap[ino] == DSTATE) {
 		lncntp[ino] = dp->di_nlink;
 		lncntp[parent]++;
 	}
 	dp = ginode(parent);
 	dp->di_nlink++;
 	inodirty();
 	return (ino);
 }
 
 /*
  * free a directory inode
  */
 static void
 freedir(ino, parent)
 	ino_t ino, parent;
 {
 	struct dinode *dp;
 
 	if (ino != parent) {
 		dp = ginode(parent);
 		dp->di_nlink--;
 		inodirty();
 	}
 	freeino(ino);
 }
 
 /*
  * generate a temporary name for the lost+found directory.
  */
 static int
 lftempname(bufp, ino)
 	char *bufp;
 	ino_t ino;
 {
 	register ino_t in;
 	register char *cp;
 	int namlen;
 
 	cp = bufp + 2;
 	for (in = maxino; in > 0; in /= 10)
 		cp++;
 	*--cp = 0;
 	namlen = cp - bufp;
 	in = ino;
 	while (cp > bufp) {
 		*--cp = (in % 10) + '0';
 		in /= 10;
 	}
 	*cp = '#';
 	return (namlen);
 }
 
 /*
  * Get a directory block.
  * Insure that it is held until another is requested.
  */
 static struct bufarea *
 getdirblk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 
 	if (pdirbp != 0)
 		pdirbp->b_flags &= ~B_INUSE;
 	pdirbp = getdatablk(blkno, size);
 	return (pdirbp);
 }
Index: head/sbin/fsck_ffs/fsck.h
===================================================================
--- head/sbin/fsck_ffs/fsck.h	(revision 34265)
+++ head/sbin/fsck_ffs/fsck.h	(revision 34266)
@@ -1,281 +1,283 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fsck.h	8.4 (Berkeley) 5/9/95
  */
 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 
 #define	MAXDUP		10	/* limit on dup blks (per inode) */
 #define	MAXBAD		10	/* limit on bad blks (per inode) */
 #define	MAXBUFSPACE	40*1024	/* maximum space to allocate to buffers */
 #define	INOBUFSIZE	56*1024	/* size of buffer to read inodes in pass1 */
 
 #ifndef BUFSIZ
 #define BUFSIZ 1024
 #endif
 
 #define	USTATE	01		/* inode not allocated */
 #define	FSTATE	02		/* inode is file */
 #define	DSTATE	03		/* inode is directory */
 #define	DFOUND	04		/* directory found during descent */
 #define	DCLEAR	05		/* directory is to be cleared */
 #define	FCLEAR	06		/* file is to be cleared */
 
 /*
  * buffer cache structure.
  */
 struct bufarea {
 	struct bufarea *b_next;		/* free list queue */
 	struct bufarea *b_prev;		/* free list queue */
 	ufs_daddr_t b_bno;
 	int b_size;
 	int b_errs;
 	int b_flags;
 	union {
 		char *b_buf;			/* buffer space */
 		ufs_daddr_t *b_indir;		/* indirect block */
 		struct fs *b_fs;		/* super block */
 		struct cg *b_cg;		/* cylinder group */
 		struct dinode *b_dinode;	/* inode block */
 	} b_un;
 	char b_dirty;
 };
 
 #define	B_INUSE 1
 
 #define	MINBUFS		5	/* minimum number of buffers required */
 struct bufarea bufhead;		/* head of list of other blks in filesys */
 struct bufarea sblk;		/* file system superblock */
 struct bufarea cgblk;		/* cylinder group blocks */
 struct bufarea *pdirbp;		/* current directory contents */
 struct bufarea *pbp;		/* current inode block */
 
 #define	dirty(bp)	(bp)->b_dirty = 1
 #define	initbarea(bp) \
 	(bp)->b_dirty = 0; \
 	(bp)->b_bno = (ufs_daddr_t)-1; \
 	(bp)->b_flags = 0;
 
 #define	sbdirty()	sblk.b_dirty = 1
 #define	cgdirty()	cgblk.b_dirty = 1
 #define	sblock		(*sblk.b_un.b_fs)
 #define	cgrp		(*cgblk.b_un.b_cg)
 
 enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
 
 struct inodesc {
 	enum fixstate id_fix;	/* policy on fixing errors */
 	int (*id_func)();	/* function to be applied to blocks of inode */
 	ino_t id_number;	/* inode number described */
 	ino_t id_parent;	/* for DATA nodes, their parent */
 	ufs_daddr_t id_blkno;	/* current block number being examined */
 	int id_numfrags;	/* number of frags contained in block */
 	quad_t id_filesize;	/* for DATA nodes, the size of the directory */
 	int id_loc;		/* for DATA nodes, current location in dir */
 	int id_entryno;		/* for DATA nodes, current entry number */
 	struct direct *id_dirp;	/* for DATA nodes, ptr to current entry */
 	char *id_name;		/* for DATA nodes, name to find or enter */
 	char id_type;		/* type of descriptor, DATA or ADDR */
 };
 /* file types */
 #define	DATA	1
 #define	ADDR	2
 
 /*
  * Linked list of duplicate blocks.
  *
  * The list is composed of two parts. The first part of the
  * list (from duplist through the node pointed to by muldup)
  * contains a single copy of each duplicate block that has been
  * found. The second part of the list (from muldup to the end)
  * contains duplicate blocks that have been found more than once.
  * To check if a block has been found as a duplicate it is only
  * necessary to search from duplist through muldup. To find the
  * total number of times that a block has been found as a duplicate
  * the entire list must be searched for occurences of the block
  * in question. The following diagram shows a sample list where
  * w (found twice), x (found once), y (found three times), and z
  * (found once) are duplicate block numbers:
  *
  *    w -> y -> x -> z -> y -> w -> y
  *    ^		     ^
  *    |		     |
  * duplist	  muldup
  */
 struct dups {
 	struct dups *next;
 	ufs_daddr_t dup;
 };
 struct dups *duplist;		/* head of dup list */
 struct dups *muldup;		/* end of unique duplicate dup block numbers */
 
 /*
  * Linked list of inodes with zero link counts.
  */
 struct zlncnt {
 	struct zlncnt *next;
 	ino_t zlncnt;
 };
 struct zlncnt *zlnhead;		/* head of zero link count list */
 
 /*
  * Inode cache data structures.
  */
 struct inoinfo {
 	struct	inoinfo *i_nexthash;	/* next entry in hash chain */
 	ino_t	i_number;		/* inode number of this entry */
 	ino_t	i_parent;		/* inode number of parent */
 	ino_t	i_dotdot;		/* inode number of `..' */
 	size_t	i_isize;		/* size of inode */
 	u_int	i_numblks;		/* size of block array in bytes */
 	ufs_daddr_t i_blks[1];		/* actually longer */
 } **inphead, **inpsort;
 long numdirs, listmax, inplast;
 
 char	*cdevname;		/* name of device being checked */
 long	dev_bsize;		/* computed value of DEV_BSIZE */
 long	secsize;		/* actual disk sector size */
 char	fflag;			/* force fs check (ignore clean flag) */
 char	nflag;			/* assume a no response */
 char	yflag;			/* assume a yes response */
 int	bflag;			/* location of alternate super block */
 int	debug;			/* output debugging info */
 int	cvtlevel;		/* convert to newer file system format */
 int	doinglevel1;		/* converting to new cylinder group format */
 int	doinglevel2;		/* converting to new inode format */
 int	newinofmt;		/* filesystem has new inode format */
+char	usedsoftdep;		/* just fix soft dependency inconsistencies */
+char	resolved;		/* cleared if unresolved changes => not clean */
 char	preen;			/* just fix normal inconsistencies */
 char	hotroot;		/* checking root device */
 char	havesb;			/* superblock has been read */
 int	fsmodified;		/* 1 => write done to file system */
 int	fsreadfd;		/* file descriptor for reading file system */
 int	fswritefd;		/* file descriptor for writing file system */
 int	returntosingle;		/* return to single user mode */
 int	rerun;			/* rerun fsck. Only used in non-preen mode */
 
 ufs_daddr_t maxfsblock;		/* number of blocks in the file system */
 char	*blockmap;		/* ptr to primary blk allocation map */
 ino_t	maxino;			/* number of inodes in file system */
 ino_t	lastino;		/* last inode in use */
 char	*statemap;		/* ptr to inode state table */
 u_char	*typemap;		/* ptr to inode type table */
 short	*lncntp;		/* ptr to link count table */
 
 ino_t	lfdir;			/* lost & found directory inode number */
 char	*lfname;		/* lost & found directory name */
 int	lfmode;			/* lost & found directory creation mode */
 
 ufs_daddr_t n_blks;		/* number of blocks in use */
 ufs_daddr_t n_files;		/* number of files in use */
 
 #define	clearinode(dp)	(*(dp) = zino)
 struct	dinode zino;
 
 #define	setbmap(blkno)	setbit(blockmap, blkno)
 #define	testbmap(blkno)	isset(blockmap, blkno)
 #define	clrbmap(blkno)	clrbit(blockmap, blkno)
 
 #define	STOP	0x01
 #define	SKIP	0x02
 #define	KEEPON	0x04
 #define	ALTERED	0x08
 #define	FOUND	0x10
 
 #define	EEXIT	8		/* Standard error exit. */
 
 struct fstab;
 
 void		adjust __P((struct inodesc *, int lcnt));
 ufs_daddr_t	allocblk __P((long frags));
 ino_t		allocdir __P((ino_t parent, ino_t request, int mode));
 ino_t		allocino __P((ino_t request, int type));
 void		blkerror __P((ino_t ino, char *type, ufs_daddr_t blk));
 char	       *blockcheck __P((char *name));
 int		bread __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		bufinit __P((void));
 void		bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		cacheino __P((struct dinode *dp, ino_t inumber));
 void		catch __P((int));
 void		catchquit __P((int));
 int		changeino __P((ino_t dir, char *name, ino_t newnum));
 int		checkfstab __P((int preen, int maxrun,
 			int (*docheck)(struct fstab *),
 			int (*chkit)(char *, char *, long, int)));
 int		chkrange __P((ufs_daddr_t blk, int cnt));
 void		ckfini __P((int markclean));
 int		ckinode __P((struct dinode *dp, struct inodesc *));
 void		clri __P((struct inodesc *, char *type, int flag));
 void		direrror __P((ino_t ino, char *errmesg));
 int		dirscan __P((struct inodesc *));
 int		dofix __P((struct inodesc *, char *msg));
 void		ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_fragacct __P((struct fs *, int, int32_t [], int));
 int		ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		fileerror __P((ino_t cwd, ino_t ino, char *errmesg));
 int		findino __P((struct inodesc *));
 int		findname __P((struct inodesc *));
 void		flush __P((int fd, struct bufarea *bp));
 void		freeblk __P((ufs_daddr_t blkno, long frags));
 void		freeino __P((ino_t ino));
 void		freeinodebuf __P((void));
 int		ftypeok __P((struct dinode *dp));
 void		getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size));
 struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size));
 struct inoinfo *getinoinfo __P((ino_t inumber));
 struct dinode  *getnextinode __P((ino_t inumber));
 void		getpathname __P((char *namebuf, ino_t curdir, ino_t ino));
 struct dinode  *ginode __P((ino_t inumber));
 void		inocleanup __P((void));
 void		inodirty __P((void));
 int		linkup __P((ino_t orphan, ino_t parentdir));
 int		makeentry __P((ino_t parent, ino_t ino, char *name));
 void		panic __P((const char *fmt, ...));
 void		pass1 __P((void));
 void		pass1b __P((void));
 int		pass1check __P((struct inodesc *));
 void		pass2 __P((void));
 void		pass3 __P((void));
 void		pass4 __P((void));
 int		pass4check __P((struct inodesc *));
 void		pass5 __P((void));
 void		pfatal __P((const char *fmt, ...));
 void		pinode __P((ino_t ino));
 void		propagate __P((void));
 void		pwarn __P((const char *fmt, ...));
 int		reply __P((char *question));
 void		resetinodebuf __P((void));
 int		setup __P((char *dev));
 void		voidquit __P((int));
Index: head/sbin/fsck_ffs/inode.c
===================================================================
--- head/sbin/fsck_ffs/inode.c	(revision 34265)
+++ head/sbin/fsck_ffs/inode.c	(revision 34266)
@@ -1,621 +1,632 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <pwd.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ino_t startinum;
 
 static int iblock __P((struct inodesc *, long ilevel, quad_t isize));
 
 int
 ckinode(dp, idesc)
 	struct dinode *dp;
 	register struct inodesc *idesc;
 {
 	ufs_daddr_t *ap;
 	long ret, n, ndb, offset;
 	struct dinode dino;
 	quad_t remsize, sizepb;
 	mode_t mode;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (idesc->id_fix != IGNORE)
 		idesc->id_fix = DONTKNOW;
 	idesc->id_entryno = 0;
 	idesc->id_filesize = dp->di_size;
 	mode = dp->di_mode & IFMT;
 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
 	    (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0)))
 		return (KEEPON);
 	dino = *dp;
 	ndb = howmany(dino.di_size, sblock.fs_bsize);
 	for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) {
 		if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0)
 			idesc->id_numfrags =
 				numfrags(&sblock, fragroundup(&sblock, offset));
 		else
 			idesc->id_numfrags = sblock.fs_frag;
 		if (*ap == 0) {
 			if (idesc->id_type == DATA && ndb >= 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size = (ap - &dino.di_db[0]) *
 					    sblock.fs_bsize;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					
                         	}
 			}
 			continue;
 		}
 		idesc->id_blkno = *ap;
 		if (idesc->id_type == ADDR)
 			ret = (*idesc->id_func)(idesc);
 		else
 			ret = dirscan(idesc);
 		if (ret & STOP)
 			return (ret);
 	}
 	idesc->id_numfrags = sblock.fs_frag;
 	remsize = dino.di_size - sblock.fs_bsize * NDADDR;
 	sizepb = sblock.fs_bsize;
 	for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			ret = iblock(idesc, n, remsize);
 			if (ret & STOP)
 				return (ret);
 		} else {
 			if (idesc->id_type == DATA && remsize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= remsize;
 					remsize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					break;
                         	}
 			}
 		}
 		sizepb *= NINDIR(&sblock);
 		remsize -= sizepb;
 	}
 	return (KEEPON);
 }
 
 static int
 iblock(idesc, ilevel, isize)
 	struct inodesc *idesc;
 	long ilevel;
 	quad_t isize;
 {
 	ufs_daddr_t *ap;
 	ufs_daddr_t *aplim;
 	struct bufarea *bp;
 	int i, n, (*func)(), nif;
 	quad_t sizepb;
 	char buf[BUFSIZ];
 	char pathbuf[MAXPATHLEN + 1];
 	struct dinode *dp;
 
 	if (idesc->id_type == ADDR) {
 		func = idesc->id_func;
 		if (((n = (*func)(idesc)) & KEEPON) == 0)
 			return (n);
 	} else
 		func = dirscan;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags))
 		return (SKIP);
 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize);
 	ilevel--;
 	for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++)
 		sizepb *= NINDIR(&sblock);
 	nif = howmany(isize , sizepb);
 	if (nif > NINDIR(&sblock))
 		nif = NINDIR(&sblock);
 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
 		aplim = &bp->b_un.b_indir[NINDIR(&sblock)];
 		for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) {
 			if (*ap == 0)
 				continue;
 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
 				idesc->id_number);
 			if (dofix(idesc, buf)) {
 				*ap = 0;
 				dirty(bp);
 			}
 		}
 		flush(fswritefd, bp);
 	}
 	aplim = &bp->b_un.b_indir[nif];
 	for (ap = bp->b_un.b_indir; ap < aplim; ap++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			if (ilevel == 0)
 				n = (*func)(idesc);
 			else
 				n = iblock(idesc, ilevel, isize);
 			if (n & STOP) {
 				bp->b_flags &= ~B_INUSE;
 				return (n);
 			}
 		} else {
 			if (idesc->id_type == DATA && isize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= isize;
 					isize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					bp->b_flags &= ~B_INUSE;
 					return(STOP);
                         	}
 			}
 		}
 		isize -= sizepb;
 	}
 	bp->b_flags &= ~B_INUSE;
 	return (KEEPON);
 }
 
 /*
  * Check that a block in a legal block number.
  * Return 0 if in range, 1 if out of range.
  */
 int
 chkrange(blk, cnt)
 	ufs_daddr_t blk;
 	int cnt;
 {
 	register int c;
 
 	if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk)
 		return (1);
 	c = dtog(&sblock, blk);
 	if (blk < cgdmin(&sblock, c)) {
 		if ((blk + cnt) > cgsblock(&sblock, c)) {
 			if (debug) {
 				printf("blk %ld < cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > cgsbase %ld\n",
 				    blk + cnt, cgsblock(&sblock, c));
 			}
 			return (1);
 		}
 	} else {
 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
 			if (debug)  {
 				printf("blk %ld >= cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
 				    blk+cnt, sblock.fs_fpg);
 			}
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * General purpose interface for reading inodes.
  */
 struct dinode *
 ginode(inumber)
 	ino_t inumber;
 {
 	ufs_daddr_t iblk;
 
 	if (inumber < ROOTINO || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to ginode", inumber);
 	if (startinum == 0 ||
 	    inumber < startinum || inumber >= startinum + INOPB(&sblock)) {
 		iblk = ino_to_fsba(&sblock, inumber);
 		if (pbp != 0)
 			pbp->b_flags &= ~B_INUSE;
 		pbp = getdatablk(iblk, sblock.fs_bsize);
 		startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock);
 	}
 	return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]);
 }
 
 /*
  * Special purpose version of ginode used to optimize first pass
  * over all the inodes in numerical order.
  */
 ino_t nextino, lastinum;
 long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
 struct dinode *inodebuf;
 
 struct dinode *
 getnextinode(inumber)
 	ino_t inumber;
 {
 	long size;
 	ufs_daddr_t dblk;
 	static struct dinode *dp;
 
 	if (inumber != nextino++ || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to nextinode", inumber);
 	if (inumber >= lastinum) {
 		readcnt++;
 		dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum));
 		if (readcnt % readpercg == 0) {
 			size = partialsize;
 			lastinum += partialcnt;
 		} else {
 			size = inobufsize;
 			lastinum += fullcnt;
 		}
 		(void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */
 		dp = inodebuf;
 	}
 	return (dp++);
 }
 
 void
 resetinodebuf()
 {
 
 	startinum = 0;
 	nextino = 0;
 	lastinum = 0;
 	readcnt = 0;
 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
 	fullcnt = inobufsize / sizeof(struct dinode);
 	readpercg = sblock.fs_ipg / fullcnt;
 	partialcnt = sblock.fs_ipg % fullcnt;
 	partialsize = partialcnt * sizeof(struct dinode);
 	if (partialcnt != 0) {
 		readpercg++;
 	} else {
 		partialcnt = fullcnt;
 		partialsize = inobufsize;
 	}
 	if (inodebuf == NULL &&
 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
 		errx(EEXIT, "Cannot allocate space for inode buffer");
 	while (nextino < ROOTINO)
 		(void)getnextinode(nextino);
 }
 
 void
 freeinodebuf()
 {
 
 	if (inodebuf != NULL)
 		free((char *)inodebuf);
 	inodebuf = NULL;
 }
 
 /*
  * Routines to maintain information about directory inodes.
  * This is built during the first pass and used during the
  * second and third passes.
  *
  * Enter inodes into the cache.
  */
 void
 cacheino(dp, inumber)
 	register struct dinode *dp;
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 	struct inoinfo **inpp;
 	unsigned int blks;
 
 	blks = howmany(dp->di_size, sblock.fs_bsize);
 	if (blks > NDADDR)
 		blks = NDADDR + NIADDR;
 	inp = (struct inoinfo *)
 		malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t));
 	if (inp == NULL)
 		return;
 	inpp = &inphead[inumber % numdirs];
 	inp->i_nexthash = *inpp;
 	*inpp = inp;
 	if (inumber == ROOTINO)
 		inp->i_parent = ROOTINO;
 	else
 		inp->i_parent = (ino_t)0;
 	inp->i_dotdot = (ino_t)0;
 	inp->i_number = inumber;
 	inp->i_isize = dp->di_size;
 	inp->i_numblks = blks * sizeof(ufs_daddr_t);
 	memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks);
 	if (inplast == listmax) {
 		listmax += 100;
 		inpsort = (struct inoinfo **)realloc((char *)inpsort,
 		    (unsigned)listmax * sizeof(struct inoinfo *));
 		if (inpsort == NULL)
 			errx(EEXIT, "cannot increase directory list");
 	}
 	inpsort[inplast++] = inp;
 }
 
 /*
  * Look up an inode cache structure.
  */
 struct inoinfo *
 getinoinfo(inumber)
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 
 	for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) {
 		if (inp->i_number != inumber)
 			continue;
 		return (inp);
 	}
 	errx(EEXIT, "cannot find inode %d", inumber);
 	return ((struct inoinfo *)0);
 }
 
 /*
  * Clean up all the inode cache structure.
  */
 void
 inocleanup()
 {
 	register struct inoinfo **inpp;
 
 	if (inphead == NULL)
 		return;
 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
 		free((char *)(*inpp));
 	free((char *)inphead);
 	free((char *)inpsort);
 	inphead = inpsort = NULL;
 }
 
 void
 inodirty()
 {
 
 	dirty(pbp);
 }
 
 void
 clri(idesc, type, flag)
 	register struct inodesc *idesc;
 	char *type;
 	int flag;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (flag == 1) {
 		pwarn("%s %s", type,
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE");
 		pinode(idesc->id_number);
 	}
 	if (preen || reply("CLEAR") == 1) {
 		if (preen)
 			printf(" (CLEARED)\n");
 		n_files--;
 		(void)ckinode(dp, idesc);
 		clearinode(dp);
 		statemap[idesc->id_number] = USTATE;
 		inodirty();
 	}
 }
 
 int
 findname(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino != idesc->id_parent)
 		return (KEEPON);
 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
 	return (STOP|FOUND);
 }
 
 int
 findino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino == 0)
 		return (KEEPON);
 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
 	    dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) {
 		idesc->id_parent = dirp->d_ino;
 		return (STOP|FOUND);
 	}
 	return (KEEPON);
 }
 
 void
 pinode(ino)
 	ino_t ino;
 {
 	register struct dinode *dp;
 	register char *p;
 	struct passwd *pw;
 	time_t t;
 
 	printf(" I=%lu ", ino);
 	if (ino < ROOTINO || ino > maxino)
 		return;
 	dp = ginode(ino);
 	printf(" OWNER=");
 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
 		printf("%s ", pw->pw_name);
 	else
 		printf("%u ", (unsigned)dp->di_uid);
 	printf("MODE=%o\n", dp->di_mode);
 	if (preen)
 		printf("%s: ", cdevname);
 	printf("SIZE=%qu ", dp->di_size);
 	t = dp->di_mtime;
 	p = ctime(&t);
 	printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
 }
 
 void
 blkerror(ino, type, blk)
 	ino_t ino;
 	char *type;
 	ufs_daddr_t blk;
 {
 
 	pfatal("%ld %s I=%lu", blk, type, ino);
 	printf("\n");
 	switch (statemap[ino]) {
 
 	case FSTATE:
 		statemap[ino] = FCLEAR;
 		return;
 
 	case DSTATE:
 		statemap[ino] = DCLEAR;
 		return;
 
 	case FCLEAR:
 	case DCLEAR:
 		return;
 
 	default:
 		errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]);
 		/* NOTREACHED */
 	}
 }
 
 /*
  * allocate an unused inode
  */
 ino_t
 allocino(request, type)
 	ino_t request;
 	int type;
 {
 	register ino_t ino;
 	register struct dinode *dp;
+	struct cg *cgp = &cgrp;
+	int cg;
 
 	if (request == 0)
 		request = ROOTINO;
 	else if (statemap[request] != USTATE)
 		return (0);
 	for (ino = request; ino < maxino; ino++)
 		if (statemap[ino] == USTATE)
 			break;
 	if (ino == maxino)
 		return (0);
+	cg = ino_to_cg(&sblock, ino);
+	getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+	if (!cg_chkmagic(cgp))
+		pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
+	cgp->cg_cs.cs_nifree--;
 	switch (type & IFMT) {
 	case IFDIR:
 		statemap[ino] = DSTATE;
+		cgp->cg_cs.cs_ndir++;
 		break;
 	case IFREG:
 	case IFLNK:
 		statemap[ino] = FSTATE;
 		break;
 	default:
 		return (0);
 	}
+	cgdirty();
 	dp = ginode(ino);
 	dp->di_db[0] = allocblk((long)1);
 	if (dp->di_db[0] == 0) {
 		statemap[ino] = USTATE;
 		return (0);
 	}
+	dp->di_flags = 0;
 	dp->di_mode = type;
 	dp->di_atime = time(NULL);
 	dp->di_mtime = dp->di_ctime = dp->di_atime;
 	dp->di_size = sblock.fs_fsize;
 	dp->di_blocks = btodb(sblock.fs_fsize);
 	n_files++;
 	inodirty();
 	if (newinofmt)
 		typemap[ino] = IFTODT(type);
 	return (ino);
 }
 
 /*
  * deallocate an inode
  */
 void
 freeino(ino)
 	ino_t ino;
 {
 	struct inodesc idesc;
 	struct dinode *dp;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass4check;
 	idesc.id_number = ino;
 	dp = ginode(ino);
 	(void)ckinode(dp, &idesc);
 	clearinode(dp);
 	inodirty();
 	statemap[ino] = USTATE;
 	n_files--;
 }
Index: head/sbin/fsck_ffs/main.c
===================================================================
--- head/sbin/fsck_ffs/main.c	(revision 34265)
+++ head/sbin/fsck_ffs/main.c	(revision 34266)
@@ -1,353 +1,359 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1986, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)main.c	8.6 (Berkeley) 5/14/95";
 #endif
 static const char rcsid[] =
-	"$Id$";
+	"$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <fstab.h>
 #include <string.h>
 
 #include "fsck.h"
 
 int	returntosingle;
 
 static int argtoi __P((int flag, char *req, char *str, int base));
 static int docheck __P((struct fstab *fsp));
 static int checkfilesys __P((char *filesys, char *mntpt, long auxdata,
 		int child));
 int main __P((int argc, char *argv[]));
 
 int
 main(argc, argv)
 	int	argc;
 	char	*argv[];
 {
 	int ch;
 	int ret, maxrun = 0;
 
 	sync();
 	while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) {
 		switch (ch) {
 		case 'p':
 			preen++;
 			break;
 
 		case 'b':
 			bflag = argtoi('b', "number", optarg, 10);
 			printf("Alternate super block location: %d\n", bflag);
 			break;
 
 		case 'c':
 			cvtlevel = argtoi('c', "conversion level", optarg, 10);
 			break;
 
 		case 'd':
 			debug++;
 			break;
 
 		case 'f':
 			fflag++;
 			break;
 
 		case 'l':
 			maxrun = argtoi('l', "number", optarg, 10);
 			break;
 
 		case 'm':
 			lfmode = argtoi('m', "mode", optarg, 8);
 			if (lfmode &~ 07777)
 				errx(EEXIT, "bad mode to -m: %o", lfmode);
 			printf("** lost+found creation mode %o\n", lfmode);
 			break;
 
 		case 'n':
 		case 'N':
 			nflag++;
 			yflag = 0;
 			break;
 
 		case 'y':
 		case 'Y':
 			yflag++;
 			nflag = 0;
 			break;
 
 		default:
 			errx(EEXIT, "%c option?", ch);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
 		(void)signal(SIGINT, catch);
 	if (preen)
 		(void)signal(SIGQUIT, catchquit);
 	if (argc) {
 		while (argc-- > 0)
 			(void)checkfilesys(blockcheck(*argv++), 0, 0L, 0);
 		exit(0);
 	}
 	ret = checkfstab(preen, maxrun, docheck, checkfilesys);
 	if (returntosingle)
 		exit(2);
 	exit(ret);
 }
 
 static int
 argtoi(flag, req, str, base)
 	int flag;
 	char *req, *str;
 	int base;
 {
 	char *cp;
 	int ret;
 
 	ret = (int)strtol(str, &cp, base);
 	if (cp == str || *cp)
 		errx(EEXIT, "-%c flag requires a %s", flag, req);
 	return (ret);
 }
 
 /*
  * Determine whether a filesystem should be checked.
  */
 static int
 docheck(fsp)
 	register struct fstab *fsp;
 {
 
 	if (strcmp(fsp->fs_vfstype, "ufs") ||
 	    (strcmp(fsp->fs_type, FSTAB_RW) &&
 	     strcmp(fsp->fs_type, FSTAB_RO)) ||
 	    fsp->fs_passno == 0)
 		return (0);
 	return (1);
 }
 
 /*
  * Check the specified filesystem.
  */
 /* ARGSUSED */
 static int
 checkfilesys(filesys, mntpt, auxdata, child)
 	char *filesys, *mntpt;
 	long auxdata;
 	int child;
 {
 	ufs_daddr_t n_ffree, n_bfree;
 	struct dups *dp;
 	struct zlncnt *zlnp;
 	int cylno, flags;
 
 	if (preen && child)
 		(void)signal(SIGQUIT, voidquit);
 	cdevname = filesys;
 	if (debug && preen)
 		pwarn("starting\n");
 	switch (setup(filesys)) {
 	case 0:
 		if (preen)
 			pfatal("CAN'T CHECK FILE SYSTEM.");
 		return (0);
 	case -1:
 		pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree +
 		    sblock.fs_frag * sblock.fs_cstotal.cs_nbfree);
 		printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 		    sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree,
 		    sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
 		return (0);
 	}
 
 	/*
+	 * Cleared if any questions answered no. Used to decide if
+	 * the superblock should be marked clean.
+	 */
+	resolved = 1;
+	/*
 	 * 1: scan inodes tallying blocks used
 	 */
 	if (preen == 0) {
 		printf("** Last Mounted on %s\n", sblock.fs_fsmnt);
 		if (hotroot)
 			printf("** Root file system\n");
 		printf("** Phase 1 - Check Blocks and Sizes\n");
 	}
 	pass1();
 
 	/*
 	 * 1b: locate first references to duplicates, if any
 	 */
 	if (duplist) {
-		if (preen)
+		if (preen || usedsoftdep)
 			pfatal("INTERNAL ERROR: dups with -p");
 		printf("** Phase 1b - Rescan For More DUPS\n");
 		pass1b();
 	}
 
 	/*
 	 * 2: traverse directories from root to mark all connected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 2 - Check Pathnames\n");
 	pass2();
 
 	/*
 	 * 3: scan inodes looking for disconnected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 3 - Check Connectivity\n");
 	pass3();
 
 	/*
 	 * 4: scan inodes looking for disconnected files; check reference counts
 	 */
 	if (preen == 0)
 		printf("** Phase 4 - Check Reference Counts\n");
 	pass4();
 
 	/*
 	 * 5: check and repair resource counts in cylinder groups
 	 */
 	if (preen == 0)
 		printf("** Phase 5 - Check Cyl groups\n");
 	pass5();
 
 	/*
 	 * print out summary statistics
 	 */
 	n_ffree = sblock.fs_cstotal.cs_nffree;
 	n_bfree = sblock.fs_cstotal.cs_nbfree;
 	pwarn("%ld files, %ld used, %ld free ",
 	    n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree);
 	printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 	    n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize);
 	if (debug &&
 	    (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree))
 		printf("%d files missing\n", n_files);
 	if (debug) {
 		n_blks += sblock.fs_ncg *
 			(cgdmin(&sblock, 0) - cgsblock(&sblock, 0));
 		n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0);
 		n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree))
 			printf("%d blocks missing\n", n_blks);
 		if (duplist != NULL) {
 			printf("The following duplicate blocks remain:");
 			for (dp = duplist; dp; dp = dp->next)
 				printf(" %d,", dp->dup);
 			printf("\n");
 		}
 		if (zlnhead != NULL) {
 			printf("The following zero link count inodes remain:");
 			for (zlnp = zlnhead; zlnp; zlnp = zlnp->next)
 				printf(" %u,", zlnp->zlncnt);
 			printf("\n");
 		}
 	}
 	zlnhead = (struct zlncnt *)0;
 	duplist = (struct dups *)0;
 	muldup = (struct dups *)0;
 	inocleanup();
 	if (fsmodified) {
 		(void)time(&sblock.fs_time);
 		sbdirty();
 	}
 	if (cvtlevel && sblk.b_dirty) {
 		/*
 		 * Write out the duplicate super blocks
 		 */
 		for (cylno = 0; cylno < sblock.fs_ncg; cylno++)
 			bwrite(fswritefd, (char *)&sblock,
 			    fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE);
 	}
-	if (!hotroot) {
-		ckfini(1);
-	} else {
+	if (rerun)
+		resolved = 0;
+	flags = 0;
+	if (hotroot) {
 		struct statfs stfs_buf;
 		/*
 		 * Check to see if root is mounted read-write.
 		 */
 		if (statfs("/", &stfs_buf) == 0)
 			flags = stfs_buf.f_flags;
-		else
-			flags = 0;
-		ckfini(flags & MNT_RDONLY);
+		if ((flags & MNT_RDONLY) == 0)
+			resolved = 0;
 	}
+	ckfini(resolved);
 	free(blockmap);
 	free(statemap);
 	free((char *)lncntp);
 	if (!fsmodified)
 		return (0);
 	if (!preen)
 		printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
 	if (rerun)
 		printf("\n***** PLEASE RERUN FSCK *****\n");
 	if (hotroot) {
 		struct ufs_args args;
 		int ret;
 		/*
 		 * We modified the root.  Do a mount update on
 		 * it, unless it is read-write, so we can continue.
 		 */
 		if (flags & MNT_RDONLY) {
 			args.fspec = 0;
 			args.export.ex_flags = 0;
 			args.export.ex_root = 0;
 			flags |= MNT_UPDATE | MNT_RELOAD;
 			ret = mount("ufs", "/", flags, &args);
 			if (ret == 0)
 				return (0);
 		}
 		if (!preen)
 			printf("\n***** REBOOT NOW *****\n");
 		sync();
 		return (4);
 	}
 	return (0);
 }
Index: head/sbin/fsck_ffs/pass1.c
===================================================================
--- head/sbin/fsck_ffs/pass1.c	(revision 34265)
+++ head/sbin/fsck_ffs/pass1.c	(revision 34266)
@@ -1,322 +1,330 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass1.c	8.6 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ufs_daddr_t badblk;
 static ufs_daddr_t dupblk;
 
 static void checkinode __P((ino_t inumber, struct inodesc *));
 
 void
 pass1()
 {
 	ino_t inumber;
 	int c, i, cgd;
 	struct inodesc idesc;
 
 	/*
 	 * Set file system reserved blocks in used block map.
 	 */
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		cgd = cgdmin(&sblock, c);
 		if (c == 0) {
 			i = cgbase(&sblock, c);
 			cgd += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		} else
 			i = cgsblock(&sblock, c);
 		for (; i < cgd; i++)
 			setbmap(i);
 	}
 	/*
 	 * Find all allocated blocks.
 	 */
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass1check;
 	inumber = 0;
 	n_files = n_blks = 0;
 	resetinodebuf();
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		for (i = 0; i < sblock.fs_ipg; i++, inumber++) {
 			if (inumber < ROOTINO)
 				continue;
 			checkinode(inumber, &idesc);
 		}
 	}
 	freeinodebuf();
 }
 
 static void
 checkinode(inumber, idesc)
 	ino_t inumber;
 	register struct inodesc *idesc;
 {
 	register struct dinode *dp;
 	struct zlncnt *zlnp;
 	int ndb, j;
 	mode_t mode;
 	char *symbuf;
 
 	dp = getnextinode(inumber);
 	mode = dp->di_mode & IFMT;
 	if (mode == 0) {
 		if (memcmp(dp->di_db, zino.di_db,
 			NDADDR * sizeof(ufs_daddr_t)) ||
 		    memcmp(dp->di_ib, zino.di_ib,
 			NIADDR * sizeof(ufs_daddr_t)) ||
 		    dp->di_mode || dp->di_size) {
 			pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber);
 			if (reply("CLEAR") == 1) {
 				dp = ginode(inumber);
 				clearinode(dp);
 				inodirty();
 			}
 		}
 		statemap[inumber] = USTATE;
 		return;
 	}
 	lastino = inumber;
 	if (/* dp->di_size < 0 || */
 	    dp->di_size + sblock.fs_bsize - 1 < dp->di_size ||
 	    (mode == IFDIR && dp->di_size > MAXDIRSIZE)) {
 		if (debug)
 			printf("bad size %qu:", dp->di_size);
 		goto unknown;
 	}
 	if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) {
 		dp = ginode(inumber);
 		dp->di_size = sblock.fs_fsize;
 		dp->di_mode = IFREG|0600;
 		inodirty();
 	}
 	ndb = howmany(dp->di_size, sblock.fs_bsize);
 	if (ndb < 0) {
 		if (debug)
 			printf("bad size %qu ndb %d:",
 				dp->di_size, ndb);
 		goto unknown;
 	}
 	if (mode == IFBLK || mode == IFCHR)
 		ndb++;
 	if (mode == IFLNK) {
 		if (doinglevel2 &&
 		    dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN &&
 		    dp->di_blocks != 0) {
 			symbuf = alloca(secsize);
 			if (bread(fsreadfd, symbuf,
 			    fsbtodb(&sblock, dp->di_db[0]),
 			    (long)secsize) != 0)
 				errx(EEXIT, "cannot read symlink");
 			if (debug) {
 				symbuf[dp->di_size] = 0;
 				printf("convert symlink %ld(%s) of size %ld\n",
 					inumber, symbuf, (long)dp->di_size);
 			}
 			dp = ginode(inumber);
 			memmove(dp->di_shortlink, symbuf, (long)dp->di_size);
 			dp->di_blocks = 0;
 			inodirty();
 		}
 		/*
 		 * Fake ndb value so direct/indirect block checks below
 		 * will detect any garbage after symlink string.
 		 */
 		if (dp->di_size < sblock.fs_maxsymlinklen ||
 		    dp->di_blocks == 0) {
 			ndb = howmany(dp->di_size, sizeof(ufs_daddr_t));
 			if (ndb > NDADDR) {
 				j = ndb - NDADDR;
 				for (ndb = 1; j > 1; j--)
 					ndb *= NINDIR(&sblock);
 				ndb += NDADDR;
 			}
 		}
 	}
 	for (j = ndb; j < NDADDR; j++)
 		if (dp->di_db[j] != 0) {
 			if (debug)
 				printf("bad direct addr: %ld\n", dp->di_db[j]);
 			goto unknown;
 		}
 	for (j = 0, ndb -= NDADDR; ndb > 0; j++)
 		ndb /= NINDIR(&sblock);
 	for (; j < NIADDR; j++)
 		if (dp->di_ib[j] != 0) {
 			if (debug)
 				printf("bad indirect addr: %ld\n",
 					dp->di_ib[j]);
 			goto unknown;
 		}
 	if (ftypeok(dp) == 0)
 		goto unknown;
 	n_files++;
 	lncntp[inumber] = dp->di_nlink;
 	if (dp->di_nlink <= 0) {
 		zlnp = (struct zlncnt *)malloc(sizeof *zlnp);
 		if (zlnp == NULL) {
 			pfatal("LINK COUNT TABLE OVERFLOW");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 		} else {
 			zlnp->zlncnt = inumber;
 			zlnp->next = zlnhead;
 			zlnhead = zlnp;
 		}
 	}
 	if (mode == IFDIR) {
 		if (dp->di_size == 0)
 			statemap[inumber] = DCLEAR;
 		else
 			statemap[inumber] = DSTATE;
 		cacheino(dp, inumber);
 	} else
 		statemap[inumber] = FSTATE;
 	typemap[inumber] = IFTODT(mode);
 	if (doinglevel2 &&
 	    (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) {
 		dp = ginode(inumber);
 		dp->di_uid = dp->di_ouid;
 		dp->di_ouid = -1;
 		dp->di_gid = dp->di_ogid;
 		dp->di_ogid = -1;
 		inodirty();
 	}
 	badblk = dupblk = 0;
 	idesc->id_number = inumber;
 	(void)ckinode(dp, idesc);
 	idesc->id_entryno *= btodb(sblock.fs_fsize);
 	if (dp->di_blocks != idesc->id_entryno) {
 		pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)",
 		    inumber, dp->di_blocks, idesc->id_entryno);
 		if (preen)
 			printf(" (CORRECTED)\n");
 		else if (reply("CORRECT") == 0)
 			return;
 		dp = ginode(inumber);
 		dp->di_blocks = idesc->id_entryno;
 		inodirty();
 	}
 	return;
 unknown:
 	pfatal("UNKNOWN FILE TYPE I=%lu", inumber);
 	statemap[inumber] = FCLEAR;
 	if (reply("CLEAR") == 1) {
 		statemap[inumber] = USTATE;
 		dp = ginode(inumber);
 		clearinode(dp);
 		inodirty();
 	}
 }
 
 int
 pass1check(idesc)
 	register struct inodesc *idesc;
 {
 	int res = KEEPON;
 	int anyout, nfrags;
 	ufs_daddr_t blkno = idesc->id_blkno;
 	register struct dups *dlp;
 	struct dups *new;
 
 	if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) {
 		blkerror(idesc->id_number, "BAD", blkno);
 		if (badblk++ >= MAXBAD) {
 			pwarn("EXCESSIVE BAD BLKS I=%lu",
 				idesc->id_number);
 			if (preen)
 				printf(" (SKIPPING)\n");
-			else if (reply("CONTINUE") == 0)
+			else if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			return (STOP);
 		}
 	}
 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
 		if (anyout && chkrange(blkno, 1)) {
 			res = SKIP;
 		} else if (!testbmap(blkno)) {
 			n_blks++;
 			setbmap(blkno);
 		} else {
 			blkerror(idesc->id_number, "DUP", blkno);
 			if (dupblk++ >= MAXDUP) {
 				pwarn("EXCESSIVE DUP BLKS I=%lu",
 					idesc->id_number);
 				if (preen)
 					printf(" (SKIPPING)\n");
-				else if (reply("CONTINUE") == 0)
+				else if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new = (struct dups *)malloc(sizeof(struct dups));
 			if (new == NULL) {
 				pfatal("DUP TABLE OVERFLOW.");
-				if (reply("CONTINUE") == 0)
+				if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new->dup = blkno;
 			if (muldup == 0) {
 				duplist = muldup = new;
 				new->next = 0;
 			} else {
 				new->next = muldup->next;
 				muldup->next = new;
 			}
 			for (dlp = duplist; dlp != muldup; dlp = dlp->next)
 				if (dlp->dup == blkno)
 					break;
 			if (dlp == muldup && dlp->dup != blkno)
 				muldup = new;
 		}
 		/*
 		 * count the number of blocks found in id_entryno
 		 */
 		idesc->id_entryno++;
 	}
 	return (res);
 }
Index: head/sbin/fsck_ffs/pass2.c
===================================================================
--- head/sbin/fsck_ffs/pass2.c	(revision 34265)
+++ head/sbin/fsck_ffs/pass2.c	(revision 34266)
@@ -1,467 +1,482 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass2.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 #define MINDIRSIZE	(sizeof (struct dirtemplate))
 
 static int blksort __P((const void *, const void *));
 static int pass2check __P((struct inodesc *));
 
 void
 pass2()
 {
 	register struct dinode *dp;
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	struct inodesc curino;
 	struct dinode dino;
 	char pathbuf[MAXPATHLEN + 1];
 
 	switch (statemap[ROOTINO]) {
 
 	case USTATE:
 		pfatal("ROOT INODE UNALLOCATED");
-		if (reply("ALLOCATE") == 0)
+		if (reply("ALLOCATE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 			errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 		break;
 
 	case DCLEAR:
 		pfatal("DUPS/BAD IN ROOT INODE");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("CONTINUE") == 0)
+		if (reply("CONTINUE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		break;
 
 	case FSTATE:
 	case FCLEAR:
 		pfatal("ROOT INODE NOT DIRECTORY");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("FIX") == 0)
+		if (reply("FIX") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		dp = ginode(ROOTINO);
 		dp->di_mode &= ~IFMT;
 		dp->di_mode |= IFDIR;
 		inodirty();
 		break;
 
 	case DSTATE:
 		break;
 
 	default:
 		errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]);
 	}
 	statemap[ROOTINO] = DFOUND;
 	if (newinofmt) {
 		statemap[WINO] = FSTATE;
 		typemap[WINO] = DT_WHT;
 	}
 	/*
 	 * Sort the directory list into disk block order.
 	 */
 	qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort);
 	/*
 	 * Check the integrity of each directory.
 	 */
 	memset(&curino, 0, sizeof(struct inodesc));
 	curino.id_type = DATA;
 	curino.id_func = pass2check;
 	dp = &dino;
 	inpend = &inpsort[inplast];
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_isize == 0)
 			continue;
 		if (inp->i_isize < MINDIRSIZE) {
 			direrror(inp->i_number, "DIRECTORY TOO SHORT");
 			inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ);
 			if (reply("FIX") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = inp->i_isize;
 				inodirty();
 				dp = &dino;
 			}
 		} else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) {
 			getpathname(pathbuf, inp->i_number, inp->i_number);
-			pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d",
-				pathbuf, inp->i_isize, DIRBLKSIZ);
+			if (usedsoftdep)
+				pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
+			else
+				pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
 			if (preen)
 				printf(" (ADJUSTED)\n");
 			inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ);
 			if (preen || reply("ADJUST") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = roundup(inp->i_isize, DIRBLKSIZ);
 				inodirty();
 				dp = &dino;
 			}
 		}
 		memset(&dino, 0, sizeof(struct dinode));
 		dino.di_mode = IFDIR;
 		dp->di_size = inp->i_isize;
 		memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks);
 		curino.id_number = inp->i_number;
 		curino.id_parent = inp->i_parent;
 		(void)ckinode(dp, &curino);
 	}
 	/*
 	 * Now that the parents of all directories have been found,
 	 * make another pass to verify the value of `..'
 	 */
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_parent == 0 || inp->i_isize == 0)
 			continue;
 		if (statemap[inp->i_parent] == DFOUND &&
 		    statemap[inp->i_number] == DSTATE)
 			statemap[inp->i_number] = DFOUND;
 		if (inp->i_dotdot == inp->i_parent ||
 		    inp->i_dotdot == (ino_t)-1)
 			continue;
 		if (inp->i_dotdot == 0) {
 			inp->i_dotdot = inp->i_parent;
 			fileerror(inp->i_parent, inp->i_number, "MISSING '..'");
 			if (reply("FIX") == 0)
 				continue;
 			(void)makeentry(inp->i_number, inp->i_parent, "..");
 			lncntp[inp->i_parent]--;
 			continue;
 		}
 		fileerror(inp->i_parent, inp->i_number,
 		    "BAD INODE NUMBER FOR '..'");
 		if (reply("FIX") == 0)
 			continue;
 		lncntp[inp->i_dotdot]++;
 		lncntp[inp->i_parent]--;
 		inp->i_dotdot = inp->i_parent;
 		(void)changeino(inp->i_number, "..", inp->i_parent);
 	}
 	/*
 	 * Mark all the directories that can be found from the root.
 	 */
 	propagate();
 }
 
 static int
 pass2check(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	register struct inoinfo *inp;
 	int n, entrysize, ret = 0;
 	struct dinode *dp;
 	char *errmsg;
 	struct direct proto;
 	char namebuf[MAXPATHLEN + 1];
 	char pathbuf[MAXPATHLEN + 1];
 
 	/*
 	 * If converting, set directory entry type.
 	 */
 	if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) {
 		dirp->d_type = typemap[dirp->d_ino];
 		ret |= ALTERED;
 	}
 	/*
 	 * check for "."
 	 */
 	if (idesc->id_entryno != 0)
 		goto chk1;
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) {
 		if (dirp->d_ino != idesc->id_number) {
 			direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'");
 			dirp->d_ino = idesc->id_number;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk1;
 	}
 	direrror(idesc->id_number, "MISSING '.'");
 	proto.d_ino = idesc->id_number;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 1;
 	(void)strcpy(proto.d_name, ".");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) {
 		pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 	} else if (dirp->d_reclen < entrysize) {
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n");
 	} else if (dirp->d_reclen < 2 * entrysize) {
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 		n = dirp->d_reclen - entrysize;
 		proto.d_reclen = entrysize;
 		memmove(dirp, &proto, (size_t)entrysize);
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + entrysize);
 		memset(dirp, 0, (size_t)n);
 		dirp->d_reclen = n;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 chk1:
 	if (idesc->id_entryno > 1)
 		goto chk2;
 	inp = getinoinfo(idesc->id_number);
 	proto.d_ino = inp->i_parent;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 2;
 	(void)strcpy(proto.d_name, "..");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (idesc->id_entryno == 0) {
 		n = DIRSIZ(0, dirp);
 		if (dirp->d_reclen < n + entrysize)
 			goto chk2;
 		proto.d_reclen = dirp->d_reclen - n;
 		dirp->d_reclen = n;
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + n);
 		memset(dirp, 0, (size_t)proto.d_reclen);
 		dirp->d_reclen = proto.d_reclen;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) {
 		inp->i_dotdot = dirp->d_ino;
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk2;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 		inp->i_dotdot = (ino_t)-1;
 	} else if (dirp->d_reclen < entrysize) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n");
 		inp->i_dotdot = (ino_t)-1;
 	} else if (inp->i_parent != 0) {
 		/*
 		 * We know the parent, so fix now.
 		 */
 		inp->i_dotdot = inp->i_parent;
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 	idesc->id_entryno++;
 	if (dirp->d_ino != 0)
 		lncntp[dirp->d_ino]--;
 	return (ret|KEEPON);
 chk2:
 	if (dirp->d_ino == 0)
 		return (ret|KEEPON);
 	if (dirp->d_namlen <= 2 &&
 	    dirp->d_name[0] == '.' &&
 	    idesc->id_entryno >= 2) {
 		if (dirp->d_namlen == 1) {
 			direrror(idesc->id_number, "EXTRA '.' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 		if (dirp->d_name[1] == '.') {
 			direrror(idesc->id_number, "EXTRA '..' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 	}
 	idesc->id_entryno++;
 	n = 0;
 	if (dirp->d_ino > maxino) {
 		fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE");
 		n = reply("REMOVE");
 	} else if (newinofmt &&
 		   ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) ||
 		    (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) {
 		fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY");
 		dirp->d_ino = WINO;
 		dirp->d_type = DT_WHT;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 again:
 		switch (statemap[dirp->d_ino]) {
 		case USTATE:
 			if (idesc->id_entryno <= 2)
 				break;
 			fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED");
 			n = reply("REMOVE");
 			break;
 
 		case DCLEAR:
 		case FCLEAR:
 			if (idesc->id_entryno <= 2)
 				break;
 			if (statemap[dirp->d_ino] == FCLEAR)
 				errmsg = "DUP/BAD";
-			else if (!preen)
+			else if (!preen && !usedsoftdep)
 				errmsg = "ZERO LENGTH DIRECTORY";
 			else {
 				n = 1;
 				break;
 			}
 			fileerror(idesc->id_number, dirp->d_ino, errmsg);
 			if ((n = reply("REMOVE")) == 1)
 				break;
 			dp = ginode(dirp->d_ino);
 			statemap[dirp->d_ino] =
 			    (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE;
 			lncntp[dirp->d_ino] = dp->di_nlink;
 			goto again;
 
 		case DSTATE:
 			if (statemap[idesc->id_number] == DFOUND)
 				statemap[dirp->d_ino] = DFOUND;
 			/* fall through */
 
 		case DFOUND:
 			inp = getinoinfo(dirp->d_ino);
 			if (inp->i_parent != 0 && idesc->id_entryno > 2) {
 				getpathname(pathbuf, idesc->id_number,
 				    idesc->id_number);
 				getpathname(namebuf, dirp->d_ino, dirp->d_ino);
 				pwarn("%s %s %s\n", pathbuf,
 				    "IS AN EXTRANEOUS HARD LINK TO DIRECTORY",
 				    namebuf);
-				if (preen)
-					printf(" (IGNORED)\n");
+				if (preen) {
+					printf(" (REMOVED)\n");
+  					n = 1;
+  					break;
+				}
 				else if ((n = reply("REMOVE")) == 1)
 					break;
 			}
 			if (idesc->id_entryno > 2)
 				inp->i_parent = idesc->id_number;
 			/* fall through */
 
 		case FSTATE:
 			if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) {
 				fileerror(idesc->id_number, dirp->d_ino,
 				    "BAD TYPE VALUE");
 				dirp->d_type = typemap[dirp->d_ino];
 				if (reply("FIX") == 1)
 					ret |= ALTERED;
 			}
 			lncntp[dirp->d_ino]--;
 			break;
 
 		default:
 			errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 			    statemap[dirp->d_ino], dirp->d_ino);
 		}
 	}
 	if (n == 0)
 		return (ret|KEEPON);
 	dirp->d_ino = 0;
 	return (ret|KEEPON|ALTERED);
 }
 
 /*
  * Routine to sort disk blocks.
  */
 static int
 blksort(arg1, arg2)
 	const void *arg1, *arg2;
 {
 
 	return ((*(struct inoinfo **)arg1)->i_blks[0] -
 		(*(struct inoinfo **)arg2)->i_blks[0]);
 }
Index: head/sbin/fsck_ffs/pass5.c
===================================================================
--- head/sbin/fsck_ffs/pass5.c	(revision 34265)
+++ head/sbin/fsck_ffs/pass5.c	(revision 34266)
@@ -1,345 +1,375 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass5.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 void
 pass5()
 {
 	int c, blk, frags, basesize, sumsize, mapsize, savednrpos;
+	int inomapsize, blkmapsize;
 	struct fs *fs = &sblock;
 	struct cg *cg = &cgrp;
 	ufs_daddr_t dbase, dmax;
 	ufs_daddr_t d;
-	long i, j;
+	long i, j, k;
 	struct csum *cs;
 	struct csum cstotal;
 	struct inodesc idesc[3];
 	char buf[MAXBSIZE];
 	register struct cg *newcg = (struct cg *)buf;
 	struct ocg *ocg = (struct ocg *)buf;
 
 	statemap[WINO] = USTATE;
 	memset(newcg, 0, (size_t)fs->fs_cgsize);
 	newcg->cg_niblk = fs->fs_ipg;
 	if (cvtlevel >= 3) {
 		if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) {
 			if (preen)
 				pwarn("DELETING CLUSTERING MAPS\n");
 			if (preen || reply("DELETE CLUSTERING MAPS")) {
 				fs->fs_contigsumsize = 0;
 				doinglevel1 = 1;
 				sbdirty();
 			}
 		}
 		if (fs->fs_maxcontig > 1) {
 			char *doit = 0;
 
 			if (fs->fs_contigsumsize < 1) {
 				doit = "CREAT";
 			} else if (fs->fs_contigsumsize < fs->fs_maxcontig &&
 				   fs->fs_contigsumsize < FS_MAXCONTIG) {
 				doit = "EXPAND";
 			}
 			if (doit) {
 				i = fs->fs_contigsumsize;
 				fs->fs_contigsumsize =
 				    MIN(fs->fs_maxcontig, FS_MAXCONTIG);
 				if (CGSIZE(fs) > fs->fs_bsize) {
 					pwarn("CANNOT %s CLUSTER MAPS\n", doit);
 					fs->fs_contigsumsize = i;
 				} else if (preen ||
 				    reply("CREATE CLUSTER MAPS")) {
 					if (preen)
 						pwarn("%sING CLUSTER MAPS\n",
 						    doit);
 					fs->fs_cgsize =
 					    fragroundup(fs, CGSIZE(fs));
 					doinglevel1 = 1;
 					sbdirty();
 				}
 			}
 		}
 	}
 	switch ((int)fs->fs_postblformat) {
 
 	case FS_42POSTBLFMT:
 		basesize = (char *)(&ocg->cg_btot[0]) -
 		    (char *)(&ocg->cg_firstfield);
 		sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]);
 		mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] -
 			(u_char *)&ocg->cg_iused[0];
+		blkmapsize = howmany(fs->fs_fpg, NBBY);
+		inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0];
 		ocg->cg_magic = CG_MAGIC;
 		savednrpos = fs->fs_nrpos;
 		fs->fs_nrpos = 8;
 		break;
 
 	case FS_DYNAMICPOSTBLFMT:
 		newcg->cg_btotoff =
 		     &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield);
 		newcg->cg_boff =
 		    newcg->cg_btotoff + fs->fs_cpg * sizeof(long);
 		newcg->cg_iusedoff = newcg->cg_boff +
 		    fs->fs_cpg * fs->fs_nrpos * sizeof(short);
 		newcg->cg_freeoff =
 		    newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY);
-		if (fs->fs_contigsumsize <= 0) {
-			newcg->cg_nextfreeoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
-		} else {
-			newcg->cg_clustersumoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) -
+		inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff;
+		newcg->cg_nextfreeoff = newcg->cg_freeoff +
+		    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
+		blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff;
+		if (fs->fs_contigsumsize > 0) {
+			newcg->cg_clustersumoff = newcg->cg_nextfreeoff -
 			    sizeof(long);
 			newcg->cg_clustersumoff =
 			    roundup(newcg->cg_clustersumoff, sizeof(long));
 			newcg->cg_clusteroff = newcg->cg_clustersumoff +
 			    (fs->fs_contigsumsize + 1) * sizeof(long);
 			newcg->cg_nextfreeoff = newcg->cg_clusteroff +
 			    howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY);
 		}
 		newcg->cg_magic = CG_MAGIC;
 		basesize = &newcg->cg_space[0] -
 		    (u_char *)(&newcg->cg_firstfield);
 		sumsize = newcg->cg_iusedoff - newcg->cg_btotoff;
 		mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff;
 		break;
 
 	default:
-		sumsize = 0;	/* keep lint happy */
+		inomapsize = blkmapsize = sumsize = 0;	/* keep lint happy */
 		errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d",
 			fs->fs_postblformat);
 	}
 	memset(&idesc[0], 0, sizeof idesc);
 	for (i = 0; i < 3; i++) {
 		idesc[i].id_type = ADDR;
 		if (doinglevel2)
 			idesc[i].id_fix = FIX;
 	}
 	memset(&cstotal, 0, sizeof(struct csum));
 	j = blknum(fs, fs->fs_size + fs->fs_frag - 1);
 	for (i = fs->fs_size; i < j; i++)
 		setbmap(i);
 	for (c = 0; c < fs->fs_ncg; c++) {
 		getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
 		if (!cg_chkmagic(cg))
 			pfatal("CG %d: BAD MAGIC NUMBER\n", c);
 		dbase = cgbase(fs, c);
 		dmax = dbase + fs->fs_fpg;
 		if (dmax > fs->fs_size)
 			dmax = fs->fs_size;
 		newcg->cg_time = cg->cg_time;
 		newcg->cg_cgx = c;
 		if (c == fs->fs_ncg - 1)
 			newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg;
 		else
 			newcg->cg_ncyl = fs->fs_cpg;
 		newcg->cg_ndblk = dmax - dbase;
 		if (fs->fs_contigsumsize > 0)
 			newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag;
 		newcg->cg_cs.cs_ndir = 0;
 		newcg->cg_cs.cs_nffree = 0;
 		newcg->cg_cs.cs_nbfree = 0;
 		newcg->cg_cs.cs_nifree = fs->fs_ipg;
 		if (cg->cg_rotor < newcg->cg_ndblk)
 			newcg->cg_rotor = cg->cg_rotor;
 		else
 			newcg->cg_rotor = 0;
 		if (cg->cg_frotor < newcg->cg_ndblk)
 			newcg->cg_frotor = cg->cg_frotor;
 		else
 			newcg->cg_frotor = 0;
 		if (cg->cg_irotor < newcg->cg_niblk)
 			newcg->cg_irotor = cg->cg_irotor;
 		else
 			newcg->cg_irotor = 0;
 		memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum);
 		memset(&cg_blktot(newcg)[0], 0,
 		      (size_t)(sumsize + mapsize));
 		if (fs->fs_postblformat == FS_42POSTBLFMT)
 			ocg->cg_magic = CG_MAGIC;
 		j = fs->fs_ipg * c;
 		for (i = 0; i < fs->fs_ipg; j++, i++) {
 			switch (statemap[j]) {
 
 			case USTATE:
 				break;
 
 			case DSTATE:
 			case DCLEAR:
 			case DFOUND:
 				newcg->cg_cs.cs_ndir++;
 				/* fall through */
 
 			case FSTATE:
 			case FCLEAR:
 				newcg->cg_cs.cs_nifree--;
 				setbit(cg_inosused(newcg), i);
 				break;
 
 			default:
 				if (j < ROOTINO)
 					break;
 				errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 				    statemap[j], j);
 			}
 		}
 		if (c == 0)
 			for (i = 0; i < ROOTINO; i++) {
 				setbit(cg_inosused(newcg), i);
 				newcg->cg_cs.cs_nifree--;
 			}
 		for (i = 0, d = dbase;
 		     d < dmax;
 		     d += fs->fs_frag, i += fs->fs_frag) {
 			frags = 0;
 			for (j = 0; j < fs->fs_frag; j++) {
 				if (testbmap(d + j))
 					continue;
 				setbit(cg_blksfree(newcg), i + j);
 				frags++;
 			}
 			if (frags == fs->fs_frag) {
 				newcg->cg_cs.cs_nbfree++;
 				j = cbtocylno(fs, i);
 				cg_blktot(newcg)[j]++;
 				cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++;
 				if (fs->fs_contigsumsize > 0)
 					setbit(cg_clustersfree(newcg),
 					    i / fs->fs_frag);
 			} else if (frags > 0) {
 				newcg->cg_cs.cs_nffree += frags;
 				blk = blkmap(fs, cg_blksfree(newcg), i);
 				ffs_fragacct(fs, blk, newcg->cg_frsum, 1);
 			}
 		}
 		if (fs->fs_contigsumsize > 0) {
 			int32_t *sump = cg_clustersum(newcg);
 			u_char *mapp = cg_clustersfree(newcg);
 			int map = *mapp++;
 			int bit = 1;
 			int run = 0;
 
 			for (i = 0; i < newcg->cg_nclusterblks; i++) {
 				if ((map & bit) != 0) {
 					run++;
 				} else if (run != 0) {
 					if (run > fs->fs_contigsumsize)
 						run = fs->fs_contigsumsize;
 					sump[run]++;
 					run = 0;
 				}
 				if ((i & (NBBY - 1)) != (NBBY - 1)) {
 					bit <<= 1;
 				} else {
 					map = *mapp++;
 					bit = 1;
 				}
 			}
 			if (run != 0) {
 				if (run > fs->fs_contigsumsize)
 					run = fs->fs_contigsumsize;
 				sump[run]++;
 			}
 		}
 		cstotal.cs_nffree += newcg->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += newcg->cg_cs.cs_nifree;
 		cstotal.cs_ndir += newcg->cg_cs.cs_ndir;
 		cs = &fs->fs_cs(fs, c);
 		if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 &&
 		    dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 			memmove(cs, &newcg->cg_cs, sizeof *cs);
 			sbdirty();
 		}
 		if (doinglevel1) {
 			memmove(cg, newcg, (size_t)fs->fs_cgsize);
 			cgdirty();
 			continue;
 		}
-		if (memcmp(cg_inosused(newcg),
-			 cg_inosused(cg), mapsize) != 0 &&
-		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
-			memmove(cg_inosused(cg), cg_inosused(newcg),
-			      (size_t)mapsize);
-			cgdirty();
-		}
 		if ((memcmp(newcg, cg, basesize) != 0 ||
 		     memcmp(&cg_blktot(newcg)[0],
 			  &cg_blktot(cg)[0], sumsize) != 0) &&
 		    dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
 			memmove(cg, newcg, (size_t)basesize);
 			memmove(&cg_blktot(cg)[0],
 			       &cg_blktot(newcg)[0], (size_t)sumsize);
+			cgdirty();
+		}
+		if (usedsoftdep) {
+			for (i = 0; i < inomapsize; i++) {
+				j = cg_inosused(newcg)[i];
+				if ((cg_inosused(cg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED INODE %d MARKED FREE",
+					    c * fs->fs_ipg + i * 8 + k);
+				}
+			}
+			for (i = 0; i < blkmapsize; i++) {
+				j = cg_blksfree(cg)[i];
+				if ((cg_blksfree(newcg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED FRAG %d MARKED FREE",
+					    c * fs->fs_fpg + i * 8 + k);
+				}
+			}
+		}
+		if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 &&
+		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
+			memmove(cg_inosused(cg), cg_inosused(newcg),
+			      (size_t)mapsize);
 			cgdirty();
 		}
 	}
 	if (fs->fs_postblformat == FS_42POSTBLFMT)
 		fs->fs_nrpos = savednrpos;
 	if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0
 	    && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 		memmove(&fs->fs_cstotal, &cstotal, sizeof *cs);
 		fs->fs_ronly = 0;
 		sbdirty();
 	}
 	if (fs->fs_fmod != 0) {
 		pwarn("MODIFIED FLAG SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_fmod = 0;
 			sbdirty();
 		}
 	}
 	if (fs->fs_clean == 0) {
 		pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_clean = 1;
 			sbdirty();
 		}
 	}
 }
Index: head/sbin/fsck_ffs/setup.c
===================================================================
--- head/sbin/fsck_ffs/setup.c	(revision 34265)
+++ head/sbin/fsck_ffs/setup.c	(revision 34266)
@@ -1,514 +1,520 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)setup.c	8.10 (Berkeley) 5/9/95";
 #endif /* not lint */
 
 #define DKTYPENAMES
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/disklabel.h>
 #include <sys/file.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <string.h>
 
 #include "fsck.h"
 
 struct bufarea asblk;
 #define altsblock (*asblk.b_un.b_fs)
 #define POWEROF2(num)	(((num) & ((num) - 1)) == 0)
 
 static void badsb __P((int listerr, char *s));
 static int calcsb __P((char *dev, int devfd, struct fs *fs));
 static struct disklabel *getdisklabel __P((char *s, int fd));
 static int readsb __P((int listerr));
 
 /*
  * Read in a superblock finding an alternate if necessary.
  * Return 1 if successful, 0 if unsuccessful, -1 if filesystem
  * is already clean (preen mode only).
  */
 int
 setup(dev)
 	char *dev;
 {
 	long cg, size, asked, i, j;
 	long skipclean, bmapsize;
 	struct disklabel *lp;
 	off_t sizepb;
 	struct stat statb;
 	struct fs proto;
 
 	havesb = 0;
 	fswritefd = -1;
 	skipclean = preen;
 	if (stat(dev, &statb) < 0) {
 		printf("Can't stat %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if ((statb.st_mode & S_IFMT) != S_IFCHR) {
 		pfatal("%s is not a character device", dev);
 		if (reply("CONTINUE") == 0)
 			return (0);
 	}
 	if ((fsreadfd = open(dev, O_RDONLY)) < 0) {
 		printf("Can't open %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if (preen == 0)
 		printf("** %s", dev);
 	if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) {
 		fswritefd = -1;
 		if (preen)
 			pfatal("NO WRITE ACCESS");
 		printf(" (NO WRITE)");
 	}
 	if (preen == 0)
 		printf("\n");
 	fsmodified = 0;
 	lfdir = 0;
 	initbarea(&sblk);
 	initbarea(&asblk);
 	sblk.b_un.b_buf = malloc(SBSIZE);
 	asblk.b_un.b_buf = malloc(SBSIZE);
 	if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL)
 		errx(EEXIT, "cannot allocate space for superblock");
 	lp = getdisklabel((char *)NULL, fsreadfd);
 	if (lp)
 		dev_bsize = secsize = lp->d_secsize;
 	else
 		dev_bsize = secsize = DEV_BSIZE;
 	/*
 	 * Read in the superblock, looking for alternates if necessary
 	 */
 	if (readsb(1) == 0) {
 		skipclean = 0;
 		if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0)
 			return(0);
 		if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0)
 			return (0);
 		for (cg = 0; cg < proto.fs_ncg; cg++) {
 			bflag = fsbtodb(&proto, cgsblock(&proto, cg));
 			if (readsb(0) != 0)
 				break;
 		}
 		if (cg >= proto.fs_ncg) {
 			printf("%s %s\n%s %s\n%s %s\n",
 				"SEARCH FOR ALTERNATE SUPER-BLOCK",
 				"FAILED. YOU MUST USE THE",
 				"-b OPTION TO FSCK TO SPECIFY THE",
 				"LOCATION OF AN ALTERNATE",
 				"SUPER-BLOCK TO SUPPLY NEEDED",
 				"INFORMATION; SEE fsck(8).");
 			bflag = 0;
 			return(0);
 		}
 		pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
 		bflag = 0;
 	}
 	maxfsblock = sblock.fs_size;
 	maxino = sblock.fs_ncg * sblock.fs_ipg;
 	/*
 	 * Check and potentially fix certain fields in the super block.
 	 */
 	if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) {
 		pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_optim = FS_OPTTIME;
 			sbdirty();
 		}
 	}
 	if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) {
 		pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK",
 			sblock.fs_minfree);
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_minfree = 10;
 			sbdirty();
 		}
 	}
 	if (sblock.fs_interleave < 1 ||
 	    sblock.fs_interleave > sblock.fs_nsect) {
 		pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK",
 			sblock.fs_interleave);
 		sblock.fs_interleave = 1;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_npsect < sblock.fs_nsect ||
 	    sblock.fs_npsect > sblock.fs_nsect*2) {
 		pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK",
 			sblock.fs_npsect);
 		sblock.fs_npsect = sblock.fs_nsect;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_inodefmt >= FS_44INODEFMT) {
 		newinofmt = 1;
 	} else {
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		newinofmt = 0;
 	}
 	/*
 	 * Convert to new inode format.
 	 */
 	if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW INODE FORMAT\n");
 		else if (!reply("CONVERT TO NEW INODE FORMAT"))
 			return(0);
 		doinglevel2++;
 		sblock.fs_inodefmt = FS_44INODEFMT;
 		sizepb = sblock.fs_bsize;
 		sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1;
 		for (i = 0; i < NIADDR; i++) {
 			sizepb *= NINDIR(&sblock);
 			sblock.fs_maxfilesize += sizepb;
 		}
 		sblock.fs_maxsymlinklen = MAXSYMLINKLEN;
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		sbdirty();
 		dirty(&asblk);
 	}
 	/*
 	 * Convert to new cylinder group format.
 	 */
 	if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n");
 		else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT"))
 			return(0);
 		doinglevel1++;
 		sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
 		sblock.fs_nrpos = 8;
 		sblock.fs_postbloff =
 		    (char *)(&sblock.fs_opostbl[0][0]) -
 		    (char *)(&sblock.fs_firstfield);
 		sblock.fs_rotbloff = &sblock.fs_space[0] -
 		    (u_char *)(&sblock.fs_firstfield);
 		sblock.fs_cgsize =
 			fragroundup(&sblock, CGSIZE(&sblock));
 		sbdirty();
 		dirty(&asblk);
 	}
 	if (asblk.b_dirty && !bflag) {
 		memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize);
 		flush(fswritefd, &asblk);
 	}
 	/*
 	 * read in the summary info.
 	 */
 	asked = 0;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize;
 		sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size);
 		if (bread(fsreadfd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    size) != 0 && !asked) {
 			pfatal("BAD SUMMARY INFORMATION");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			asked++;
 		}
 	}
 	/*
 	 * If we survive the above basic checks and are preening,
 	 * quit here unless forced.
 	 */
 	if (skipclean && sblock.fs_clean && !fflag)
 		return (-1);
 	/*
 	 * allocate and initialize the necessary maps
 	 */
 	bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short));
 	blockmap = calloc((unsigned)bmapsize, sizeof (char));
 	if (blockmap == NULL) {
 		printf("cannot alloc %u bytes for blockmap\n",
 		    (unsigned)bmapsize);
 		goto badsb;
 	}
 	statemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (statemap == NULL) {
 		printf("cannot alloc %u bytes for statemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	typemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (typemap == NULL) {
 		printf("cannot alloc %u bytes for typemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short));
 	if (lncntp == NULL) {
 		printf("cannot alloc %u bytes for lncntp\n",
 		    (unsigned)(maxino + 1) * sizeof(short));
 		goto badsb;
 	}
 	numdirs = sblock.fs_cstotal.cs_ndir;
 	if (numdirs == 0) {
 		printf("numdirs is zero, try using an alternate superblock\n");
 		goto badsb;
 	}
 	inplast = 0;
 	listmax = numdirs + 10;
 	inpsort = (struct inoinfo **)calloc((unsigned)listmax,
 	    sizeof(struct inoinfo *));
 	inphead = (struct inoinfo **)calloc((unsigned)numdirs,
 	    sizeof(struct inoinfo *));
 	if (inpsort == NULL || inphead == NULL) {
 		printf("cannot alloc %u bytes for inphead\n",
 		    (unsigned)numdirs * sizeof(struct inoinfo *));
 		goto badsb;
 	}
 	bufinit();
+	if (sblock.fs_flags & FS_DOSOFTDEP)
+		usedsoftdep = 1;
+	else
+		usedsoftdep = 0;
 	return (1);
 
 badsb:
 	ckfini(0);
 	return (0);
 }
 
 /*
  * Read in the super block and its summary info.
  */
 static int
 readsb(listerr)
 	int listerr;
 {
 	ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize;
 
 	if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0)
 		return (0);
 	sblk.b_bno = super;
 	sblk.b_size = SBSIZE;
 	/*
 	 * run a few consistency checks of the super block
 	 */
 	if (sblock.fs_magic != FS_MAGIC)
 		{ badsb(listerr, "MAGIC NUMBER WRONG"); return (0); }
 	if (sblock.fs_ncg < 1)
 		{ badsb(listerr, "NCG OUT OF RANGE"); return (0); }
 	if (sblock.fs_cpg < 1)
 		{ badsb(listerr, "CPG OUT OF RANGE"); return (0); }
 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl)
 		{ badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); }
 	if (sblock.fs_sbsize > SBSIZE)
 		{ badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); }
 	/*
 	 * Compute block size that the filesystem is based on,
 	 * according to fsbtodb, and adjust superblock block number
 	 * so we can tell if this is an alternate later.
 	 */
 	super *= dev_bsize;
 	dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1);
 	sblk.b_bno = super / dev_bsize;
 	if (bflag) {
 		havesb = 1;
 		return (1);
 	}
 	/*
 	 * Set all possible fields that could differ, then do check
 	 * of whole super block against an alternate super block.
 	 * When an alternate super-block is specified this check is skipped.
 	 */
 	getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize);
 	if (asblk.b_errs)
 		return (0);
 	altsblock.fs_firstfield = sblock.fs_firstfield;
 	altsblock.fs_unused_1 = sblock.fs_unused_1;
 	altsblock.fs_time = sblock.fs_time;
 	altsblock.fs_cstotal = sblock.fs_cstotal;
 	altsblock.fs_cgrotor = sblock.fs_cgrotor;
 	altsblock.fs_fmod = sblock.fs_fmod;
 	altsblock.fs_clean = sblock.fs_clean;
 	altsblock.fs_ronly = sblock.fs_ronly;
 	altsblock.fs_flags = sblock.fs_flags;
 	altsblock.fs_maxcontig = sblock.fs_maxcontig;
 	altsblock.fs_minfree = sblock.fs_minfree;
 	altsblock.fs_optim = sblock.fs_optim;
 	altsblock.fs_rotdelay = sblock.fs_rotdelay;
 	altsblock.fs_maxbpg = sblock.fs_maxbpg;
 	memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp);
 	altsblock.fs_maxcluster = sblock.fs_maxcluster;
 	memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt);
 	memmove(altsblock.fs_sparecon,
 		sblock.fs_sparecon, sizeof sblock.fs_sparecon);
 	/*
 	 * The following should not have to be copied.
 	 */
 	altsblock.fs_fsbtodb = sblock.fs_fsbtodb;
 	altsblock.fs_interleave = sblock.fs_interleave;
 	altsblock.fs_npsect = sblock.fs_npsect;
 	altsblock.fs_nrpos = sblock.fs_nrpos;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_qbmask = sblock.fs_qbmask;
 	altsblock.fs_qfmask = sblock.fs_qfmask;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_maxfilesize = sblock.fs_maxfilesize;
 	if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) {
 		if (debug) {
 			long *nlp, *olp, *endlp;
 
 			printf("superblock mismatches\n");
 			nlp = (long *)&altsblock;
 			olp = (long *)&sblock;
 			endlp = olp + (sblock.fs_sbsize / sizeof *olp);
 			for ( ; olp < endlp; olp++, nlp++) {
 				if (*olp == *nlp)
 					continue;
 				printf("offset %d, original %d, alternate %d\n",
 				    olp - (long *)&sblock, *olp, *nlp);
 			}
 		}
 		badsb(listerr,
 		"VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE");
 		return (0);
 	}
 	havesb = 1;
 	return (1);
 }
 
 static void
 badsb(listerr, s)
 	int listerr;
 	char *s;
 {
 
 	if (!listerr)
 		return;
 	if (preen)
 		printf("%s: ", cdevname);
 	pfatal("BAD SUPER BLOCK: %s\n", s);
 }
 
 /*
  * Calculate a prototype superblock based on information in the disk label.
  * When done the cgsblock macro can be calculated and the fs_ncg field
  * can be used. Do NOT attempt to use other macros without verifying that
  * their needed information is available!
  */
 static int
 calcsb(dev, devfd, fs)
 	char *dev;
 	int devfd;
 	register struct fs *fs;
 {
 	register struct disklabel *lp;
 	register struct partition *pp;
 	register char *cp;
 	int i;
 
 	cp = strchr(dev, '\0') - 1;
 	if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) {
 		pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev);
 		return (0);
 	}
 	lp = getdisklabel(dev, devfd);
 	if (isdigit(*cp))
 		pp = &lp->d_partitions[0];
 	else
 		pp = &lp->d_partitions[*cp - 'a'];
 	if (pp->p_fstype != FS_BSDFFS) {
 		pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n",
 			dev, pp->p_fstype < FSMAXTYPES ?
 			fstypenames[pp->p_fstype] : "unknown");
 		return (0);
 	}
 	if (pp->p_fsize == 0 || pp->p_frag == 0) {
 		pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n",
 			dev, fstypenames[pp->p_fstype]);
 		return (0);
 	}
 	memset(fs, 0, sizeof(struct fs));
 	fs->fs_fsize = pp->p_fsize;
 	fs->fs_frag = pp->p_frag;
 	fs->fs_cpg = pp->p_cpg;
 	fs->fs_size = pp->p_size;
 	fs->fs_ntrak = lp->d_ntracks;
 	fs->fs_nsect = lp->d_nsectors;
 	fs->fs_spc = lp->d_secpercyl;
 	fs->fs_nspf = fs->fs_fsize / lp->d_secsize;
 	fs->fs_sblkno = roundup(
 		howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize),
 		fs->fs_frag);
 	fs->fs_cgmask = 0xffffffff;
 	for (i = fs->fs_ntrak; i > 1; i >>= 1)
 		fs->fs_cgmask <<= 1;
 	if (!POWEROF2(fs->fs_ntrak))
 		fs->fs_cgmask <<= 1;
 	fs->fs_cgoffset = roundup(
 		howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag);
 	fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs);
 	fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg);
 	for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1)
 		fs->fs_fsbtodb++;
 	dev_bsize = lp->d_secsize;
 	return (1);
 }
 
 static struct disklabel *
 getdisklabel(s, fd)
 	char *s;
 	int	fd;
 {
 	static struct disklabel lab;
 
 	if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) {
 		if (s == NULL)
 			return ((struct disklabel *)NULL);
 		pwarn("ioctl (GCINFO): %s\n", strerror(errno));
 		errx(EEXIT, "%s: can't read disk label", s);
 	}
 	return (&lab);
 }
Index: head/sbin/fsck_ffs/utilities.c
===================================================================
--- head/sbin/fsck_ffs/utilities.c	(revision 34265)
+++ head/sbin/fsck_ffs/utilities.c	(revision 34266)
@@ -1,625 +1,648 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)utilities.c	8.6 (Berkeley) 5/19/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 long	diskreads, totalreads;	/* Disk cache statistics */
 
 static void rwerror __P((char *mesg, ufs_daddr_t blk));
 
 int
 ftypeok(dp)
 	struct dinode *dp;
 {
 	switch (dp->di_mode & IFMT) {
 
 	case IFDIR:
 	case IFREG:
 	case IFBLK:
 	case IFCHR:
 	case IFLNK:
 	case IFSOCK:
 	case IFIFO:
 		return (1);
 
 	default:
 		if (debug)
 			printf("bad file type 0%o\n", dp->di_mode);
 		return (0);
 	}
 }
 
 int
 reply(question)
 	char *question;
 {
 	int persevere;
 	char c;
 
 	if (preen)
 		pfatal("INTERNAL ERROR: GOT TO reply()");
 	persevere = !strcmp(question, "CONTINUE");
 	printf("\n");
 	if (!persevere && (nflag || fswritefd < 0)) {
 		printf("%s? no\n\n", question);
+		resolved = 0;
 		return (0);
 	}
 	if (yflag || (persevere && nflag)) {
 		printf("%s? yes\n\n", question);
 		return (1);
 	}
 	do	{
 		printf("%s? [yn] ", question);
 		(void) fflush(stdout);
 		c = getc(stdin);
-		while (c != '\n' && getc(stdin) != '\n')
-			if (feof(stdin))
+		while (c != '\n' && getc(stdin) != '\n') {
+			if (feof(stdin)) {
+				resolved = 0;
 				return (0);
+			}
+		}
 	} while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
 	printf("\n");
 	if (c == 'y' || c == 'Y')
 		return (1);
+	resolved = 0;
 	return (0);
 }
 
 /*
  * Malloc buffers and set up cache.
  */
 void
 bufinit()
 {
 	register struct bufarea *bp;
 	long bufcnt, i;
 	char *bufp;
 
 	pbp = pdirbp = (struct bufarea *)0;
 	bufp = malloc((unsigned int)sblock.fs_bsize);
 	if (bufp == 0)
 		errx(EEXIT, "cannot allocate buffer pool");
 	cgblk.b_un.b_buf = bufp;
 	initbarea(&cgblk);
 	bufhead.b_next = bufhead.b_prev = &bufhead;
 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
 	if (bufcnt < MINBUFS)
 		bufcnt = MINBUFS;
 	for (i = 0; i < bufcnt; i++) {
 		bp = (struct bufarea *)malloc(sizeof(struct bufarea));
 		bufp = malloc((unsigned int)sblock.fs_bsize);
 		if (bp == NULL || bufp == NULL) {
 			if (i >= MINBUFS)
 				break;
 			errx(EEXIT, "cannot allocate buffer pool");
 		}
 		bp->b_un.b_buf = bufp;
 		bp->b_prev = &bufhead;
 		bp->b_next = bufhead.b_next;
 		bufhead.b_next->b_prev = bp;
 		bufhead.b_next = bp;
 		initbarea(bp);
 	}
 	bufhead.b_size = i;	/* save number of buffers */
 }
 
 /*
  * Manage a cache of directory blocks.
  */
 struct bufarea *
 getdatablk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 	register struct bufarea *bp;
 
 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
 		if (bp->b_bno == fsbtodb(&sblock, blkno))
 			goto foundit;
 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
 		if ((bp->b_flags & B_INUSE) == 0)
 			break;
 	if (bp == &bufhead)
 		errx(EEXIT, "deadlocked buffer pool");
 	getblk(bp, blkno, size);
 	/* fall through */
 foundit:
 	totalreads++;
 	bp->b_prev->b_next = bp->b_next;
 	bp->b_next->b_prev = bp->b_prev;
 	bp->b_prev = &bufhead;
 	bp->b_next = bufhead.b_next;
 	bufhead.b_next->b_prev = bp;
 	bufhead.b_next = bp;
 	bp->b_flags |= B_INUSE;
 	return (bp);
 }
 
 void
 getblk(bp, blk, size)
 	register struct bufarea *bp;
 	ufs_daddr_t blk;
 	long size;
 {
 	ufs_daddr_t dblk;
 
 	dblk = fsbtodb(&sblock, blk);
 	if (bp->b_bno != dblk) {
 		flush(fswritefd, bp);
 		diskreads++;
 		bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size);
 		bp->b_bno = dblk;
 		bp->b_size = size;
 	}
 }
 
 void
 flush(fd, bp)
 	int fd;
 	register struct bufarea *bp;
 {
 	register int i, j;
 
 	if (!bp->b_dirty)
 		return;
 	if (bp->b_errs != 0)
 		pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n",
 		    (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
 		    bp->b_bno);
 	bp->b_dirty = 0;
 	bp->b_errs = 0;
 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
 	if (bp != &sblk)
 		return;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		bwrite(fswritefd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize);
 	}
 }
 
 static void
 rwerror(mesg, blk)
 	char *mesg;
 	ufs_daddr_t blk;
 {
 
 	if (preen == 0)
 		printf("\n");
 	pfatal("CANNOT %s: BLK %ld", mesg, blk);
 	if (reply("CONTINUE") == 0)
 		exit(EEXIT);
 }
 
 void
 ckfini(markclean)
 	int markclean;
 {
 	register struct bufarea *bp, *nbp;
 	int ofsmodified, cnt = 0;
 
 	if (fswritefd < 0) {
 		(void)close(fsreadfd);
 		return;
 	}
 	flush(fswritefd, &sblk);
 	if (havesb && sblk.b_bno != SBOFF / dev_bsize &&
 	    !preen && reply("UPDATE STANDARD SUPERBLOCK")) {
 		sblk.b_bno = SBOFF / dev_bsize;
 		sbdirty();
 		flush(fswritefd, &sblk);
 	}
 	flush(fswritefd, &cgblk);
 	free(cgblk.b_un.b_buf);
 	for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) {
 		cnt++;
 		flush(fswritefd, bp);
 		nbp = bp->b_prev;
 		free(bp->b_un.b_buf);
 		free((char *)bp);
 	}
 	if (bufhead.b_size != cnt)
 		errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt);
 	pbp = pdirbp = (struct bufarea *)0;
 	if (markclean && sblock.fs_clean == 0) {
 		sblock.fs_clean = 1;
 		sbdirty();
 		ofsmodified = fsmodified;
 		flush(fswritefd, &sblk);
 		fsmodified = ofsmodified;
 		if (!preen)
 			printf("\n***** FILE SYSTEM MARKED CLEAN *****\n");
 	}
 	if (debug)
 		printf("cache missed %ld of %ld (%d%%)\n", diskreads,
 		    totalreads, (int)(diskreads * 100 / totalreads));
 	(void)close(fsreadfd);
 	(void)close(fswritefd);
 }
 
 int
 bread(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	char *cp;
 	int i, errs;
 	off_t offset;
 
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (read(fd, buf, (int)size) == size)
 		return (0);
 	rwerror("READ", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	errs = 0;
 	memset(buf, 0, (size_t)size);
 	printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
 	for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
 		if (read(fd, cp, (int)secsize) != secsize) {
 			(void)lseek(fd, offset + i + secsize, 0);
 			if (secsize != dev_bsize && dev_bsize != 1)
 				printf(" %ld (%ld),",
 				    (blk * dev_bsize + i) / secsize,
 				    blk + i / dev_bsize);
 			else
 				printf(" %ld,", blk + i / dev_bsize);
 			errs++;
 		}
 	}
 	printf("\n");
 	return (errs);
 }
 
 void
 bwrite(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	int i;
 	char *cp;
 	off_t offset;
 
 	if (fd < 0)
 		return;
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (write(fd, buf, (int)size) == size) {
 		fsmodified = 1;
 		return;
 	}
 	rwerror("WRITE", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
 	for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
 		if (write(fd, cp, (int)dev_bsize) != dev_bsize) {
 			(void)lseek(fd, offset + i + dev_bsize, 0);
 			printf(" %ld,", blk + i / dev_bsize);
 		}
 	printf("\n");
 	return;
 }
 
 /*
  * allocate a data block with the specified number of fragments
  */
 ufs_daddr_t
 allocblk(frags)
 	long frags;
 {
-	register int i, j, k;
+	int i, j, k, cg, baseblk;
+	struct cg *cgp = &cgrp;
 
 	if (frags <= 0 || frags > sblock.fs_frag)
 		return (0);
 	for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
 		for (j = 0; j <= sblock.fs_frag - frags; j++) {
 			if (testbmap(i + j))
 				continue;
 			for (k = 1; k < frags; k++)
 				if (testbmap(i + j + k))
 					break;
 			if (k < frags) {
 				j += k;
 				continue;
 			}
-			for (k = 0; k < frags; k++)
+			cg = dtog(&sblock, i + j);
+			getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+			if (!cg_chkmagic(cgp))
+				pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+			baseblk = dtogd(&sblock, i + j);
+			for (k = 0; k < frags; k++) {
 				setbmap(i + j + k);
+				clrbit(cg_blksfree(cgp), baseblk + k);
+			}
 			n_blks += frags;
+			if (frags == sblock.fs_frag)
+				cgp->cg_cs.cs_nbfree--;
+			else
+				cgp->cg_cs.cs_nffree -= frags;
+			cgdirty();
 			return (i + j);
 		}
 	}
 	return (0);
 }
 
 /*
  * Free a previously allocated block
  */
 void
 freeblk(blkno, frags)
 	ufs_daddr_t blkno;
 	long frags;
 {
 	struct inodesc idesc;
 
 	idesc.id_blkno = blkno;
 	idesc.id_numfrags = frags;
 	(void)pass4check(&idesc);
 }
 
 /*
  * Find a pathname
  */
 void
 getpathname(namebuf, curdir, ino)
 	char *namebuf;
 	ino_t curdir, ino;
 {
 	int len;
 	register char *cp;
 	struct inodesc idesc;
 	static int busy = 0;
 
 	if (curdir == ino && ino == ROOTINO) {
 		(void)strcpy(namebuf, "/");
 		return;
 	}
 	if (busy ||
 	    (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) {
 		(void)strcpy(namebuf, "?");
 		return;
 	}
 	busy = 1;
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_fix = IGNORE;
 	cp = &namebuf[MAXPATHLEN - 1];
 	*cp = '\0';
 	if (curdir != ino) {
 		idesc.id_parent = curdir;
 		goto namelookup;
 	}
 	while (ino != ROOTINO) {
 		idesc.id_number = ino;
 		idesc.id_func = findino;
 		idesc.id_name = "..";
 		if ((ckinode(ginode(ino), &idesc) & FOUND) == 0)
 			break;
 	namelookup:
 		idesc.id_number = idesc.id_parent;
 		idesc.id_parent = ino;
 		idesc.id_func = findname;
 		idesc.id_name = namebuf;
 		if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0)
 			break;
 		len = strlen(namebuf);
 		cp -= len;
 		memmove(cp, namebuf, (size_t)len);
 		*--cp = '/';
 		if (cp < &namebuf[MAXNAMLEN])
 			break;
 		ino = idesc.id_number;
 	}
 	busy = 0;
 	if (ino != ROOTINO)
 		*--cp = '?';
 	memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
 }
 
 void
 catch(sig)
 	int sig;
 {
 	if (!doinglevel2)
 		ckfini(0);
 	exit(12);
 }
 
 /*
  * When preening, allow a single quit to signal
  * a special exit after filesystem checks complete
  * so that reboot sequence may be interrupted.
  */
 void
 catchquit(sig)
 	int sig;
 {
 	printf("returning to single-user after filesystem check\n");
 	returntosingle = 1;
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * Ignore a single quit signal; wait and flush just in case.
  * Used by child processes in preen.
  */
 void
 voidquit(sig)
 	int sig;
 {
 
 	sleep(1);
 	(void)signal(SIGQUIT, SIG_IGN);
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * determine whether an inode should be fixed.
  */
 int
 dofix(idesc, msg)
 	register struct inodesc *idesc;
 	char *msg;
 {
 
 	switch (idesc->id_fix) {
 
 	case DONTKNOW:
 		if (idesc->id_type == DATA)
 			direrror(idesc->id_number, msg);
 		else
 			pwarn(msg);
 		if (preen) {
 			printf(" (SALVAGED)\n");
 			idesc->id_fix = FIX;
 			return (ALTERED);
 		}
 		if (reply("SALVAGE") == 0) {
 			idesc->id_fix = NOFIX;
 			return (0);
 		}
 		idesc->id_fix = FIX;
 		return (ALTERED);
 
 	case FIX:
 		return (ALTERED);
 
 	case NOFIX:
 	case IGNORE:
 		return (0);
 
 	default:
 		errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
 	}
 	/* NOTREACHED */
 	return (0);
 }
 
 #if __STDC__
 #include <stdarg.h>
 #else
 #include <varargs.h>
 #endif
 
 /*
  * An unexpected inconsistency occured.
- * Die if preening, otherwise just print message and continue.
+ * Die if preening or filesystem is running with soft dependency protocol,
+ * otherwise just print message and continue.
  */
 void
 #if __STDC__
 pfatal(const char *fmt, ...)
 #else
 pfatal(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (!preen) {
 		(void)vfprintf(stderr, fmt, ap);
 		va_end(ap);
+		if (usedsoftdep)
+			(void)fprintf(stderr,
+			    "\nUNEXPECTED SOFTDEP INCONSISTENCY\n");
 		return;
 	}
 	(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	(void)fprintf(stderr,
-	    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
-	    cdevname);
+	    "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
+	    cdevname, usedsoftdep ? " SOFTDEP " : " ");
+	ckfini(0);
 	exit(EEXIT);
 }
 
 /*
- * Pwarn just prints a message when not preening,
- * or a warning (preceded by filename) when preening.
+ * Pwarn just prints a message when not preening or running soft dependency
+ * protocol, or a warning (preceded by filename) when preening.
  */
 void
 #if __STDC__
 pwarn(const char *fmt, ...)
 #else
 pwarn(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (preen)
 		(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 }
 
 /*
  * Stub for routines from kernel.
  */
 void
 #if __STDC__
 panic(const char *fmt, ...)
 #else
 panic(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	pfatal("INTERNAL INCONSISTENCY:");
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	exit(EEXIT);
 }
Index: head/sbin/fsck_ifs/dir.c
===================================================================
--- head/sbin/fsck_ifs/dir.c	(revision 34265)
+++ head/sbin/fsck_ifs/dir.c	(revision 34266)
@@ -1,734 +1,737 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)dir.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 char	*lfname = "lost+found";
 int	lfmode = 01777;
 struct	dirtemplate emptydir = { 0, DIRBLKSIZ };
 struct	dirtemplate dirhead = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 struct	odirtemplate odirhead = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static int chgino __P((struct inodesc *));
 static int dircheck __P((struct inodesc *, struct direct *));
 static int expanddir __P((struct dinode *dp, char *name));
 static void freedir __P((ino_t ino, ino_t parent));
 static struct direct *fsck_readdir __P((struct inodesc *));
 static struct bufarea *getdirblk __P((ufs_daddr_t blkno, long size));
 static int lftempname __P((char *bufp, ino_t ino));
 static int mkentry __P((struct inodesc *));
 
 /*
  * Propagate connected state through the tree.
  */
 void
 propagate()
 {
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	long change;
 
 	inpend = &inpsort[inplast];
 	do {
 		change = 0;
 		for (inpp = inpsort; inpp < inpend; inpp++) {
 			inp = *inpp;
 			if (inp->i_parent == 0)
 				continue;
 			if (statemap[inp->i_parent] == DFOUND &&
 			    statemap[inp->i_number] == DSTATE) {
 				statemap[inp->i_number] = DFOUND;
 				change++;
 			}
 		}
 	} while (change > 0);
 }
 
 /*
  * Scan each entry in a directory block.
  */
 int
 dirscan(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp;
 	register struct bufarea *bp;
 	int dsize, n;
 	long blksiz;
 	char dbuf[DIRBLKSIZ];
 
 	if (idesc->id_type != DATA)
 		errx(EEXIT, "wrong type to dirscan %d", idesc->id_type);
 	if (idesc->id_entryno == 0 &&
 	    (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0)
 		idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ);
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
 		idesc->id_filesize -= blksiz;
 		return (SKIP);
 	}
 	idesc->id_loc = 0;
 	for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) {
 		dsize = dp->d_reclen;
 		memmove(dbuf, dp, (size_t)dsize);
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (!newinofmt) {
 				struct direct *tdp = (struct direct *)dbuf;
 				u_char tmp;
 
 				tmp = tdp->d_namlen;
 				tdp->d_namlen = tdp->d_type;
 				tdp->d_type = tmp;
 			}
 #		endif
 		idesc->id_dirp = (struct direct *)dbuf;
 		if ((n = (*idesc->id_func)(idesc)) & ALTERED) {
 #			if (BYTE_ORDER == LITTLE_ENDIAN)
 				if (!newinofmt && !doinglevel2) {
 					struct direct *tdp;
 					u_char tmp;
 
 					tdp = (struct direct *)dbuf;
 					tmp = tdp->d_namlen;
 					tdp->d_namlen = tdp->d_type;
 					tdp->d_type = tmp;
 				}
 #			endif
 			bp = getdirblk(idesc->id_blkno, blksiz);
 			memmove(bp->b_un.b_buf + idesc->id_loc - dsize, dbuf,
 			    (size_t)dsize);
 			dirty(bp);
 			sbdirty();
 		}
 		if (n & STOP)
 			return (n);
 	}
 	return (idesc->id_filesize > 0 ? KEEPON : STOP);
 }
 
 /*
  * get next entry in a directory.
  */
 static struct direct *
 fsck_readdir(idesc)
 	register struct inodesc *idesc;
 {
 	register struct direct *dp, *ndp;
 	register struct bufarea *bp;
 	long size, blksiz, fix, dploc;
 
 	blksiz = idesc->id_numfrags * sblock.fs_fsize;
 	bp = getdirblk(idesc->id_blkno, blksiz);
 	if (idesc->id_loc % DIRBLKSIZ == 0 && idesc->id_filesize > 0 &&
 	    idesc->id_loc < blksiz) {
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		if (dircheck(idesc, dp))
 			goto dpok;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 		dp->d_reclen = DIRBLKSIZ;
 		dp->d_ino = 0;
 		dp->d_type = 0;
 		dp->d_namlen = 0;
 		dp->d_name[0] = '\0';
 		if (fix)
 			dirty(bp);
 		idesc->id_loc += DIRBLKSIZ;
 		idesc->id_filesize -= DIRBLKSIZ;
 		return (dp);
 	}
 dpok:
 	if (idesc->id_filesize <= 0 || idesc->id_loc >= blksiz)
 		return NULL;
 	dploc = idesc->id_loc;
 	dp = (struct direct *)(bp->b_un.b_buf + dploc);
 	idesc->id_loc += dp->d_reclen;
 	idesc->id_filesize -= dp->d_reclen;
 	if ((idesc->id_loc % DIRBLKSIZ) == 0)
 		return (dp);
 	ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc);
 	if (idesc->id_loc < blksiz && idesc->id_filesize > 0 &&
 	    dircheck(idesc, ndp) == 0) {
 		size = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 		idesc->id_loc += size;
 		idesc->id_filesize -= size;
 		if (idesc->id_fix == IGNORE)
 			return (0);
 		fix = dofix(idesc, "DIRECTORY CORRUPTED");
 		bp = getdirblk(idesc->id_blkno, blksiz);
 		dp = (struct direct *)(bp->b_un.b_buf + dploc);
 		dp->d_reclen += size;
 		if (fix)
 			dirty(bp);
 	}
 	return (dp);
 }
 
 /*
  * Verify that a directory entry is valid.
  * This is a superset of the checks made in the kernel.
  */
 static int
 dircheck(idesc, dp)
 	struct inodesc *idesc;
 	register struct direct *dp;
 {
 	register int size;
 	register char *cp;
 	u_char namlen, type;
 	int spaceleft;
 
 	spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ);
 	if (dp->d_ino >= maxino ||
 	    dp->d_reclen == 0 ||
 	    dp->d_reclen > spaceleft ||
 	    (dp->d_reclen & 0x3) != 0)
 		return (0);
 	if (dp->d_ino == 0)
 		return (1);
 	size = DIRSIZ(!newinofmt, dp);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (!newinofmt) {
 			type = dp->d_namlen;
 			namlen = dp->d_type;
 		} else {
 			namlen = dp->d_namlen;
 			type = dp->d_type;
 		}
 #	else
 		namlen = dp->d_namlen;
 		type = dp->d_type;
 #	endif
 	if (dp->d_reclen < size ||
 	    idesc->id_filesize < size ||
 	    namlen > MAXNAMLEN ||
 	    type > 15)
 		return (0);
 	for (cp = dp->d_name, size = 0; size < namlen; size++)
 		if (*cp == '\0' || (*cp++ == '/'))
 			return (0);
 	if (*cp != '\0')
 		return (0);
 	return (1);
 }
 
 void
 direrror(ino, errmesg)
 	ino_t ino;
 	char *errmesg;
 {
 
 	fileerror(ino, ino, errmesg);
 }
 
 void
 fileerror(cwd, ino, errmesg)
 	ino_t cwd, ino;
 	char *errmesg;
 {
 	register struct dinode *dp;
 	char pathbuf[MAXPATHLEN + 1];
 
 	pwarn("%s ", errmesg);
 	pinode(ino);
 	printf("\n");
 	getpathname(pathbuf, cwd, ino);
 	if (ino < ROOTINO || ino > maxino) {
 		pfatal("NAME=%s\n", pathbuf);
 		return;
 	}
 	dp = ginode(ino);
 	if (ftypeok(dp))
 		pfatal("%s=%s\n",
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE", pathbuf);
 	else
 		pfatal("NAME=%s\n", pathbuf);
 }
 
 void
 adjust(idesc, lcnt)
 	register struct inodesc *idesc;
 	int lcnt;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (dp->di_nlink == lcnt) {
 		if (linkup(idesc->id_number, (ino_t)0) == 0)
 			clri(idesc, "UNREF", 0);
 	} else {
 		pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname :
 			((dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE"));
 		pinode(idesc->id_number);
 		printf(" COUNT %d SHOULD BE %d",
 			dp->di_nlink, dp->di_nlink - lcnt);
-		if (preen) {
+		if (preen || usedsoftdep) {
 			if (lcnt < 0) {
 				printf("\n");
 				pfatal("LINK COUNT INCREASING");
 			}
-			printf(" (ADJUSTED)\n");
+			if (preen)
+				printf(" (ADJUSTED)\n");
 		}
 		if (preen || reply("ADJUST") == 1) {
 			dp->di_nlink -= lcnt;
 			inodirty();
 		}
 	}
 }
 
 static int
 mkentry(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	struct direct newent;
 	int newlen, oldlen;
 
 	newent.d_namlen = strlen(idesc->id_name);
 	newlen = DIRSIZ(0, &newent);
 	if (dirp->d_ino != 0)
 		oldlen = DIRSIZ(0, dirp);
 	else
 		oldlen = 0;
 	if (dirp->d_reclen - oldlen < newlen)
 		return (KEEPON);
 	newent.d_reclen = dirp->d_reclen - oldlen;
 	dirp->d_reclen = oldlen;
 	dirp = (struct direct *)(((char *)dirp) + oldlen);
 	dirp->d_ino = idesc->id_parent;	/* ino to be entered is in id_parent */
 	dirp->d_reclen = newent.d_reclen;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	dirp->d_namlen = newent.d_namlen;
 	memmove(dirp->d_name, idesc->id_name, (size_t)newent.d_namlen + 1);
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		/*
 		 * If the entry was split, dirscan() will only reverse the byte
 		 * order of the original entry, and not the new one, before
 		 * writing it back out.  So, we reverse the byte order here if
 		 * necessary.
 		 */
 		if (oldlen != 0 && !newinofmt && !doinglevel2) {
 			u_char tmp;
 
 			tmp = dirp->d_namlen;
 			dirp->d_namlen = dirp->d_type;
 			dirp->d_type = tmp;
 		}
 #	endif
 	return (ALTERED|STOP);
 }
 
 static int
 chgino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (memcmp(dirp->d_name, idesc->id_name, (int)dirp->d_namlen + 1))
 		return (KEEPON);
 	dirp->d_ino = idesc->id_parent;
 	if (newinofmt)
 		dirp->d_type = typemap[idesc->id_parent];
 	else
 		dirp->d_type = 0;
 	return (ALTERED|STOP);
 }
 
 int
 linkup(orphan, parentdir)
 	ino_t orphan;
 	ino_t parentdir;
 {
 	register struct dinode *dp;
 	int lostdir;
 	ino_t oldlfdir;
 	struct inodesc idesc;
 	char tempname[BUFSIZ];
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	dp = ginode(orphan);
 	lostdir = (dp->di_mode & IFMT) == IFDIR;
 	pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
 	pinode(orphan);
-	if (preen && dp->di_size == 0)
+	if ((preen || usedsoftdep) && dp->di_size == 0)
 		return (0);
 	if (preen)
 		printf(" (RECONNECTED)\n");
 	else
 		if (reply("RECONNECT") == 0)
 			return (0);
+	if (parentdir != 0)
+		lncntp[parentdir]++;
 	if (lfdir == 0) {
 		dp = ginode(ROOTINO);
 		idesc.id_name = lfname;
 		idesc.id_type = DATA;
 		idesc.id_func = findino;
 		idesc.id_number = ROOTINO;
 		if ((ckinode(dp, &idesc) & FOUND) != 0) {
 			lfdir = idesc.id_parent;
 		} else {
 			pwarn("NO lost+found DIRECTORY");
 			if (preen || reply("CREATE")) {
 				lfdir = allocdir(ROOTINO, (ino_t)0, lfmode);
 				if (lfdir != 0) {
 					if (makeentry(ROOTINO, lfdir, lfname) != 0) {
 						if (preen)
 							printf(" (CREATED)\n");
 					} else {
 						freedir(lfdir, ROOTINO);
 						lfdir = 0;
 						if (preen)
 							printf("\n");
 					}
 				}
 			}
 		}
 		if (lfdir == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY");
 			printf("\n\n");
 			return (0);
 		}
 	}
 	dp = ginode(lfdir);
 	if ((dp->di_mode & IFMT) != IFDIR) {
 		pfatal("lost+found IS NOT A DIRECTORY");
 		if (reply("REALLOCATE") == 0)
 			return (0);
 		oldlfdir = lfdir;
 		if ((lfdir = allocdir(ROOTINO, (ino_t)0, lfmode)) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		if ((changeino(ROOTINO, lfname, lfdir) & ALTERED) == 0) {
 			pfatal("SORRY. CANNOT CREATE lost+found DIRECTORY\n\n");
 			return (0);
 		}
 		inodirty();
 		idesc.id_type = ADDR;
 		idesc.id_func = pass4check;
 		idesc.id_number = oldlfdir;
 		adjust(&idesc, lncntp[oldlfdir] + 1);
 		lncntp[oldlfdir] = 0;
 		dp = ginode(lfdir);
 	}
 	if (statemap[lfdir] != DFOUND) {
 		pfatal("SORRY. NO lost+found DIRECTORY\n\n");
 		return (0);
 	}
 	(void)lftempname(tempname, orphan);
 	if (makeentry(lfdir, orphan, tempname) == 0) {
 		pfatal("SORRY. NO SPACE IN lost+found DIRECTORY");
 		printf("\n\n");
 		return (0);
 	}
 	lncntp[orphan]--;
 	if (lostdir) {
 		if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 &&
 		    parentdir != (ino_t)-1)
 			(void)makeentry(orphan, lfdir, "..");
 		dp = ginode(lfdir);
 		dp->di_nlink++;
 		inodirty();
 		lncntp[lfdir]++;
 		pwarn("DIR I=%lu CONNECTED. ", orphan);
 		if (parentdir != (ino_t)-1) {
 			printf("PARENT WAS I=%lu\n", parentdir);
 			/*
 			 * The parent directory, because of the ordering
 			 * guarantees, has had the link count incremented
 			 * for the child, but no entry was made.  This
 			 * fixes the parent link count so that fsck does
 			 * not need to be rerun.
 			 */
 			lncntp[parentdir]++;
 
 		}
 		if (preen == 0)
 			printf("\n");
 	}
 	return (1);
 }
 
 /*
  * fix an entry in a directory.
  */
 int
 changeino(dir, name, newnum)
 	ino_t dir;
 	char *name;
 	ino_t newnum;
 {
 	struct inodesc idesc;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = chgino;
 	idesc.id_number = dir;
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	idesc.id_parent = newnum;	/* new value for name */
 	return (ckinode(ginode(dir), &idesc));
 }
 
 /*
  * make an entry in a directory
  */
 int
 makeentry(parent, ino, name)
 	ino_t parent, ino;
 	char *name;
 {
 	struct dinode *dp;
 	struct inodesc idesc;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (parent < ROOTINO || parent >= maxino ||
 	    ino < ROOTINO || ino >= maxino)
 		return (0);
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_func = mkentry;
 	idesc.id_number = parent;
 	idesc.id_parent = ino;	/* this is the inode to enter */
 	idesc.id_fix = DONTKNOW;
 	idesc.id_name = name;
 	dp = ginode(parent);
 	if (dp->di_size % DIRBLKSIZ) {
 		dp->di_size = roundup(dp->di_size, DIRBLKSIZ);
 		inodirty();
 	}
 	if ((ckinode(dp, &idesc) & ALTERED) != 0)
 		return (1);
 	getpathname(pathbuf, parent, parent);
 	dp = ginode(parent);
 	if (expanddir(dp, pathbuf) == 0)
 		return (0);
 	return (ckinode(dp, &idesc) & ALTERED);
 }
 
 /*
  * Attempt to expand the size of a directory
  */
 static int
 expanddir(dp, name)
 	register struct dinode *dp;
 	char *name;
 {
 	ufs_daddr_t lastbn, newblk;
 	register struct bufarea *bp;
 	char *cp, firstblk[DIRBLKSIZ];
 
 	lastbn = lblkno(&sblock, dp->di_size);
 	if (lastbn >= NDADDR - 1 || dp->di_db[lastbn] == 0 || dp->di_size == 0)
 		return (0);
 	if ((newblk = allocblk(sblock.fs_frag)) == 0)
 		return (0);
 	dp->di_db[lastbn + 1] = dp->di_db[lastbn];
 	dp->di_db[lastbn] = newblk;
 	dp->di_size += sblock.fs_bsize;
 	dp->di_blocks += btodb(sblock.fs_bsize);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(firstblk, bp->b_un.b_buf, DIRBLKSIZ);
 	bp = getdirblk(newblk, sblock.fs_bsize);
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, firstblk, DIRBLKSIZ);
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_bsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	bp = getdirblk(dp->di_db[lastbn + 1],
 		(long)dblksize(&sblock, dp, lastbn + 1));
 	if (bp->b_errs)
 		goto bad;
 	memmove(bp->b_un.b_buf, &emptydir, sizeof emptydir);
 	pwarn("NO SPACE LEFT IN %s", name);
 	if (preen)
 		printf(" (EXPANDED)\n");
 	else if (reply("EXPAND") == 0)
 		goto bad;
 	dirty(bp);
 	inodirty();
 	return (1);
 bad:
 	dp->di_db[lastbn] = dp->di_db[lastbn + 1];
 	dp->di_db[lastbn + 1] = 0;
 	dp->di_size -= sblock.fs_bsize;
 	dp->di_blocks -= btodb(sblock.fs_bsize);
 	freeblk(newblk, sblock.fs_frag);
 	return (0);
 }
 
 /*
  * allocate a new directory
  */
 ino_t
 allocdir(parent, request, mode)
 	ino_t parent, request;
 	int mode;
 {
 	ino_t ino;
 	char *cp;
 	struct dinode *dp;
 	register struct bufarea *bp;
 	struct dirtemplate *dirp;
 
 	ino = allocino(request, IFDIR|mode);
 	if (newinofmt)
 		dirp = &dirhead;
 	else
 		dirp = (struct dirtemplate *)&odirhead;
 	dirp->dot_ino = ino;
 	dirp->dotdot_ino = parent;
 	dp = ginode(ino);
 	bp = getdirblk(dp->di_db[0], sblock.fs_fsize);
 	if (bp->b_errs) {
 		freeino(ino);
 		return (0);
 	}
 	memmove(bp->b_un.b_buf, dirp, sizeof(struct dirtemplate));
 	for (cp = &bp->b_un.b_buf[DIRBLKSIZ];
 	     cp < &bp->b_un.b_buf[sblock.fs_fsize];
 	     cp += DIRBLKSIZ)
 		memmove(cp, &emptydir, sizeof emptydir);
 	dirty(bp);
 	dp->di_nlink = 2;
 	inodirty();
 	if (ino == ROOTINO) {
 		lncntp[ino] = dp->di_nlink;
 		cacheino(dp, ino);
 		return(ino);
 	}
 	if (statemap[parent] != DSTATE && statemap[parent] != DFOUND) {
 		freeino(ino);
 		return (0);
 	}
 	cacheino(dp, ino);
 	statemap[ino] = statemap[parent];
 	if (statemap[ino] == DSTATE) {
 		lncntp[ino] = dp->di_nlink;
 		lncntp[parent]++;
 	}
 	dp = ginode(parent);
 	dp->di_nlink++;
 	inodirty();
 	return (ino);
 }
 
 /*
  * free a directory inode
  */
 static void
 freedir(ino, parent)
 	ino_t ino, parent;
 {
 	struct dinode *dp;
 
 	if (ino != parent) {
 		dp = ginode(parent);
 		dp->di_nlink--;
 		inodirty();
 	}
 	freeino(ino);
 }
 
 /*
  * generate a temporary name for the lost+found directory.
  */
 static int
 lftempname(bufp, ino)
 	char *bufp;
 	ino_t ino;
 {
 	register ino_t in;
 	register char *cp;
 	int namlen;
 
 	cp = bufp + 2;
 	for (in = maxino; in > 0; in /= 10)
 		cp++;
 	*--cp = 0;
 	namlen = cp - bufp;
 	in = ino;
 	while (cp > bufp) {
 		*--cp = (in % 10) + '0';
 		in /= 10;
 	}
 	*cp = '#';
 	return (namlen);
 }
 
 /*
  * Get a directory block.
  * Insure that it is held until another is requested.
  */
 static struct bufarea *
 getdirblk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 
 	if (pdirbp != 0)
 		pdirbp->b_flags &= ~B_INUSE;
 	pdirbp = getdatablk(blkno, size);
 	return (pdirbp);
 }
Index: head/sbin/fsck_ifs/fsck.h
===================================================================
--- head/sbin/fsck_ifs/fsck.h	(revision 34265)
+++ head/sbin/fsck_ifs/fsck.h	(revision 34266)
@@ -1,281 +1,283 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fsck.h	8.4 (Berkeley) 5/9/95
  */
 
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdio.h>
 
 #define	MAXDUP		10	/* limit on dup blks (per inode) */
 #define	MAXBAD		10	/* limit on bad blks (per inode) */
 #define	MAXBUFSPACE	40*1024	/* maximum space to allocate to buffers */
 #define	INOBUFSIZE	56*1024	/* size of buffer to read inodes in pass1 */
 
 #ifndef BUFSIZ
 #define BUFSIZ 1024
 #endif
 
 #define	USTATE	01		/* inode not allocated */
 #define	FSTATE	02		/* inode is file */
 #define	DSTATE	03		/* inode is directory */
 #define	DFOUND	04		/* directory found during descent */
 #define	DCLEAR	05		/* directory is to be cleared */
 #define	FCLEAR	06		/* file is to be cleared */
 
 /*
  * buffer cache structure.
  */
 struct bufarea {
 	struct bufarea *b_next;		/* free list queue */
 	struct bufarea *b_prev;		/* free list queue */
 	ufs_daddr_t b_bno;
 	int b_size;
 	int b_errs;
 	int b_flags;
 	union {
 		char *b_buf;			/* buffer space */
 		ufs_daddr_t *b_indir;		/* indirect block */
 		struct fs *b_fs;		/* super block */
 		struct cg *b_cg;		/* cylinder group */
 		struct dinode *b_dinode;	/* inode block */
 	} b_un;
 	char b_dirty;
 };
 
 #define	B_INUSE 1
 
 #define	MINBUFS		5	/* minimum number of buffers required */
 struct bufarea bufhead;		/* head of list of other blks in filesys */
 struct bufarea sblk;		/* file system superblock */
 struct bufarea cgblk;		/* cylinder group blocks */
 struct bufarea *pdirbp;		/* current directory contents */
 struct bufarea *pbp;		/* current inode block */
 
 #define	dirty(bp)	(bp)->b_dirty = 1
 #define	initbarea(bp) \
 	(bp)->b_dirty = 0; \
 	(bp)->b_bno = (ufs_daddr_t)-1; \
 	(bp)->b_flags = 0;
 
 #define	sbdirty()	sblk.b_dirty = 1
 #define	cgdirty()	cgblk.b_dirty = 1
 #define	sblock		(*sblk.b_un.b_fs)
 #define	cgrp		(*cgblk.b_un.b_cg)
 
 enum fixstate {DONTKNOW, NOFIX, FIX, IGNORE};
 
 struct inodesc {
 	enum fixstate id_fix;	/* policy on fixing errors */
 	int (*id_func)();	/* function to be applied to blocks of inode */
 	ino_t id_number;	/* inode number described */
 	ino_t id_parent;	/* for DATA nodes, their parent */
 	ufs_daddr_t id_blkno;	/* current block number being examined */
 	int id_numfrags;	/* number of frags contained in block */
 	quad_t id_filesize;	/* for DATA nodes, the size of the directory */
 	int id_loc;		/* for DATA nodes, current location in dir */
 	int id_entryno;		/* for DATA nodes, current entry number */
 	struct direct *id_dirp;	/* for DATA nodes, ptr to current entry */
 	char *id_name;		/* for DATA nodes, name to find or enter */
 	char id_type;		/* type of descriptor, DATA or ADDR */
 };
 /* file types */
 #define	DATA	1
 #define	ADDR	2
 
 /*
  * Linked list of duplicate blocks.
  *
  * The list is composed of two parts. The first part of the
  * list (from duplist through the node pointed to by muldup)
  * contains a single copy of each duplicate block that has been
  * found. The second part of the list (from muldup to the end)
  * contains duplicate blocks that have been found more than once.
  * To check if a block has been found as a duplicate it is only
  * necessary to search from duplist through muldup. To find the
  * total number of times that a block has been found as a duplicate
  * the entire list must be searched for occurences of the block
  * in question. The following diagram shows a sample list where
  * w (found twice), x (found once), y (found three times), and z
  * (found once) are duplicate block numbers:
  *
  *    w -> y -> x -> z -> y -> w -> y
  *    ^		     ^
  *    |		     |
  * duplist	  muldup
  */
 struct dups {
 	struct dups *next;
 	ufs_daddr_t dup;
 };
 struct dups *duplist;		/* head of dup list */
 struct dups *muldup;		/* end of unique duplicate dup block numbers */
 
 /*
  * Linked list of inodes with zero link counts.
  */
 struct zlncnt {
 	struct zlncnt *next;
 	ino_t zlncnt;
 };
 struct zlncnt *zlnhead;		/* head of zero link count list */
 
 /*
  * Inode cache data structures.
  */
 struct inoinfo {
 	struct	inoinfo *i_nexthash;	/* next entry in hash chain */
 	ino_t	i_number;		/* inode number of this entry */
 	ino_t	i_parent;		/* inode number of parent */
 	ino_t	i_dotdot;		/* inode number of `..' */
 	size_t	i_isize;		/* size of inode */
 	u_int	i_numblks;		/* size of block array in bytes */
 	ufs_daddr_t i_blks[1];		/* actually longer */
 } **inphead, **inpsort;
 long numdirs, listmax, inplast;
 
 char	*cdevname;		/* name of device being checked */
 long	dev_bsize;		/* computed value of DEV_BSIZE */
 long	secsize;		/* actual disk sector size */
 char	fflag;			/* force fs check (ignore clean flag) */
 char	nflag;			/* assume a no response */
 char	yflag;			/* assume a yes response */
 int	bflag;			/* location of alternate super block */
 int	debug;			/* output debugging info */
 int	cvtlevel;		/* convert to newer file system format */
 int	doinglevel1;		/* converting to new cylinder group format */
 int	doinglevel2;		/* converting to new inode format */
 int	newinofmt;		/* filesystem has new inode format */
+char	usedsoftdep;		/* just fix soft dependency inconsistencies */
+char	resolved;		/* cleared if unresolved changes => not clean */
 char	preen;			/* just fix normal inconsistencies */
 char	hotroot;		/* checking root device */
 char	havesb;			/* superblock has been read */
 int	fsmodified;		/* 1 => write done to file system */
 int	fsreadfd;		/* file descriptor for reading file system */
 int	fswritefd;		/* file descriptor for writing file system */
 int	returntosingle;		/* return to single user mode */
 int	rerun;			/* rerun fsck. Only used in non-preen mode */
 
 ufs_daddr_t maxfsblock;		/* number of blocks in the file system */
 char	*blockmap;		/* ptr to primary blk allocation map */
 ino_t	maxino;			/* number of inodes in file system */
 ino_t	lastino;		/* last inode in use */
 char	*statemap;		/* ptr to inode state table */
 u_char	*typemap;		/* ptr to inode type table */
 short	*lncntp;		/* ptr to link count table */
 
 ino_t	lfdir;			/* lost & found directory inode number */
 char	*lfname;		/* lost & found directory name */
 int	lfmode;			/* lost & found directory creation mode */
 
 ufs_daddr_t n_blks;		/* number of blocks in use */
 ufs_daddr_t n_files;		/* number of files in use */
 
 #define	clearinode(dp)	(*(dp) = zino)
 struct	dinode zino;
 
 #define	setbmap(blkno)	setbit(blockmap, blkno)
 #define	testbmap(blkno)	isset(blockmap, blkno)
 #define	clrbmap(blkno)	clrbit(blockmap, blkno)
 
 #define	STOP	0x01
 #define	SKIP	0x02
 #define	KEEPON	0x04
 #define	ALTERED	0x08
 #define	FOUND	0x10
 
 #define	EEXIT	8		/* Standard error exit. */
 
 struct fstab;
 
 void		adjust __P((struct inodesc *, int lcnt));
 ufs_daddr_t	allocblk __P((long frags));
 ino_t		allocdir __P((ino_t parent, ino_t request, int mode));
 ino_t		allocino __P((ino_t request, int type));
 void		blkerror __P((ino_t ino, char *type, ufs_daddr_t blk));
 char	       *blockcheck __P((char *name));
 int		bread __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		bufinit __P((void));
 void		bwrite __P((int fd, char *buf, ufs_daddr_t blk, long size));
 void		cacheino __P((struct dinode *dp, ino_t inumber));
 void		catch __P((int));
 void		catchquit __P((int));
 int		changeino __P((ino_t dir, char *name, ino_t newnum));
 int		checkfstab __P((int preen, int maxrun,
 			int (*docheck)(struct fstab *),
 			int (*chkit)(char *, char *, long, int)));
 int		chkrange __P((ufs_daddr_t blk, int cnt));
 void		ckfini __P((int markclean));
 int		ckinode __P((struct dinode *dp, struct inodesc *));
 void		clri __P((struct inodesc *, char *type, int flag));
 void		direrror __P((ino_t ino, char *errmesg));
 int		dirscan __P((struct inodesc *));
 int		dofix __P((struct inodesc *, char *msg));
 void		ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_fragacct __P((struct fs *, int, int32_t [], int));
 int		ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
 void		fileerror __P((ino_t cwd, ino_t ino, char *errmesg));
 int		findino __P((struct inodesc *));
 int		findname __P((struct inodesc *));
 void		flush __P((int fd, struct bufarea *bp));
 void		freeblk __P((ufs_daddr_t blkno, long frags));
 void		freeino __P((ino_t ino));
 void		freeinodebuf __P((void));
 int		ftypeok __P((struct dinode *dp));
 void		getblk __P((struct bufarea *bp, ufs_daddr_t blk, long size));
 struct bufarea *getdatablk __P((ufs_daddr_t blkno, long size));
 struct inoinfo *getinoinfo __P((ino_t inumber));
 struct dinode  *getnextinode __P((ino_t inumber));
 void		getpathname __P((char *namebuf, ino_t curdir, ino_t ino));
 struct dinode  *ginode __P((ino_t inumber));
 void		inocleanup __P((void));
 void		inodirty __P((void));
 int		linkup __P((ino_t orphan, ino_t parentdir));
 int		makeentry __P((ino_t parent, ino_t ino, char *name));
 void		panic __P((const char *fmt, ...));
 void		pass1 __P((void));
 void		pass1b __P((void));
 int		pass1check __P((struct inodesc *));
 void		pass2 __P((void));
 void		pass3 __P((void));
 void		pass4 __P((void));
 int		pass4check __P((struct inodesc *));
 void		pass5 __P((void));
 void		pfatal __P((const char *fmt, ...));
 void		pinode __P((ino_t ino));
 void		propagate __P((void));
 void		pwarn __P((const char *fmt, ...));
 int		reply __P((char *question));
 void		resetinodebuf __P((void));
 int		setup __P((char *dev));
 void		voidquit __P((int));
Index: head/sbin/fsck_ifs/inode.c
===================================================================
--- head/sbin/fsck_ifs/inode.c	(revision 34265)
+++ head/sbin/fsck_ifs/inode.c	(revision 34266)
@@ -1,621 +1,632 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)inode.c	8.8 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <pwd.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ino_t startinum;
 
 static int iblock __P((struct inodesc *, long ilevel, quad_t isize));
 
 int
 ckinode(dp, idesc)
 	struct dinode *dp;
 	register struct inodesc *idesc;
 {
 	ufs_daddr_t *ap;
 	long ret, n, ndb, offset;
 	struct dinode dino;
 	quad_t remsize, sizepb;
 	mode_t mode;
 	char pathbuf[MAXPATHLEN + 1];
 
 	if (idesc->id_fix != IGNORE)
 		idesc->id_fix = DONTKNOW;
 	idesc->id_entryno = 0;
 	idesc->id_filesize = dp->di_size;
 	mode = dp->di_mode & IFMT;
 	if (mode == IFBLK || mode == IFCHR || (mode == IFLNK &&
 	    (dp->di_size < sblock.fs_maxsymlinklen || dp->di_blocks == 0)))
 		return (KEEPON);
 	dino = *dp;
 	ndb = howmany(dino.di_size, sblock.fs_bsize);
 	for (ap = &dino.di_db[0]; ap < &dino.di_db[NDADDR]; ap++) {
 		if (--ndb == 0 && (offset = blkoff(&sblock, dino.di_size)) != 0)
 			idesc->id_numfrags =
 				numfrags(&sblock, fragroundup(&sblock, offset));
 		else
 			idesc->id_numfrags = sblock.fs_frag;
 		if (*ap == 0) {
 			if (idesc->id_type == DATA && ndb >= 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size = (ap - &dino.di_db[0]) *
 					    sblock.fs_bsize;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					
                         	}
 			}
 			continue;
 		}
 		idesc->id_blkno = *ap;
 		if (idesc->id_type == ADDR)
 			ret = (*idesc->id_func)(idesc);
 		else
 			ret = dirscan(idesc);
 		if (ret & STOP)
 			return (ret);
 	}
 	idesc->id_numfrags = sblock.fs_frag;
 	remsize = dino.di_size - sblock.fs_bsize * NDADDR;
 	sizepb = sblock.fs_bsize;
 	for (ap = &dino.di_ib[0], n = 1; n <= NIADDR; ap++, n++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			ret = iblock(idesc, n, remsize);
 			if (ret & STOP)
 				return (ret);
 		} else {
 			if (idesc->id_type == DATA && remsize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= remsize;
 					remsize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					break;
                         	}
 			}
 		}
 		sizepb *= NINDIR(&sblock);
 		remsize -= sizepb;
 	}
 	return (KEEPON);
 }
 
 static int
 iblock(idesc, ilevel, isize)
 	struct inodesc *idesc;
 	long ilevel;
 	quad_t isize;
 {
 	ufs_daddr_t *ap;
 	ufs_daddr_t *aplim;
 	struct bufarea *bp;
 	int i, n, (*func)(), nif;
 	quad_t sizepb;
 	char buf[BUFSIZ];
 	char pathbuf[MAXPATHLEN + 1];
 	struct dinode *dp;
 
 	if (idesc->id_type == ADDR) {
 		func = idesc->id_func;
 		if (((n = (*func)(idesc)) & KEEPON) == 0)
 			return (n);
 	} else
 		func = dirscan;
 	if (chkrange(idesc->id_blkno, idesc->id_numfrags))
 		return (SKIP);
 	bp = getdatablk(idesc->id_blkno, sblock.fs_bsize);
 	ilevel--;
 	for (sizepb = sblock.fs_bsize, i = 0; i < ilevel; i++)
 		sizepb *= NINDIR(&sblock);
 	nif = howmany(isize , sizepb);
 	if (nif > NINDIR(&sblock))
 		nif = NINDIR(&sblock);
 	if (idesc->id_func == pass1check && nif < NINDIR(&sblock)) {
 		aplim = &bp->b_un.b_indir[NINDIR(&sblock)];
 		for (ap = &bp->b_un.b_indir[nif]; ap < aplim; ap++) {
 			if (*ap == 0)
 				continue;
 			(void)sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
 				idesc->id_number);
 			if (dofix(idesc, buf)) {
 				*ap = 0;
 				dirty(bp);
 			}
 		}
 		flush(fswritefd, bp);
 	}
 	aplim = &bp->b_un.b_indir[nif];
 	for (ap = bp->b_un.b_indir; ap < aplim; ap++) {
 		if (*ap) {
 			idesc->id_blkno = *ap;
 			if (ilevel == 0)
 				n = (*func)(idesc);
 			else
 				n = iblock(idesc, ilevel, isize);
 			if (n & STOP) {
 				bp->b_flags &= ~B_INUSE;
 				return (n);
 			}
 		} else {
 			if (idesc->id_type == DATA && isize > 0) {
 				/* An empty block in a directory XXX */
 				getpathname(pathbuf, idesc->id_number,
 						idesc->id_number);
                         	pfatal("DIRECTORY %s: CONTAINS EMPTY BLOCKS",
 					pathbuf);
                         	if (reply("ADJUST LENGTH") == 1) {
 					dp = ginode(idesc->id_number);
                                 	dp->di_size -= isize;
 					isize = 0;
 					printf(
 					    "YOU MUST RERUN FSCK AFTERWARDS\n");
 					rerun = 1;
                                 	inodirty();
 					bp->b_flags &= ~B_INUSE;
 					return(STOP);
                         	}
 			}
 		}
 		isize -= sizepb;
 	}
 	bp->b_flags &= ~B_INUSE;
 	return (KEEPON);
 }
 
 /*
  * Check that a block in a legal block number.
  * Return 0 if in range, 1 if out of range.
  */
 int
 chkrange(blk, cnt)
 	ufs_daddr_t blk;
 	int cnt;
 {
 	register int c;
 
 	if (blk < 0 || blk >= maxfsblock || cnt < 0 || cnt > maxfsblock - blk)
 		return (1);
 	c = dtog(&sblock, blk);
 	if (blk < cgdmin(&sblock, c)) {
 		if ((blk + cnt) > cgsblock(&sblock, c)) {
 			if (debug) {
 				printf("blk %ld < cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > cgsbase %ld\n",
 				    blk + cnt, cgsblock(&sblock, c));
 			}
 			return (1);
 		}
 	} else {
 		if ((blk + cnt) > cgbase(&sblock, c+1)) {
 			if (debug)  {
 				printf("blk %ld >= cgdmin %ld;",
 				    blk, cgdmin(&sblock, c));
 				printf(" blk + cnt %ld > sblock.fs_fpg %ld\n",
 				    blk+cnt, sblock.fs_fpg);
 			}
 			return (1);
 		}
 	}
 	return (0);
 }
 
 /*
  * General purpose interface for reading inodes.
  */
 struct dinode *
 ginode(inumber)
 	ino_t inumber;
 {
 	ufs_daddr_t iblk;
 
 	if (inumber < ROOTINO || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to ginode", inumber);
 	if (startinum == 0 ||
 	    inumber < startinum || inumber >= startinum + INOPB(&sblock)) {
 		iblk = ino_to_fsba(&sblock, inumber);
 		if (pbp != 0)
 			pbp->b_flags &= ~B_INUSE;
 		pbp = getdatablk(iblk, sblock.fs_bsize);
 		startinum = (inumber / INOPB(&sblock)) * INOPB(&sblock);
 	}
 	return (&pbp->b_un.b_dinode[inumber % INOPB(&sblock)]);
 }
 
 /*
  * Special purpose version of ginode used to optimize first pass
  * over all the inodes in numerical order.
  */
 ino_t nextino, lastinum;
 long readcnt, readpercg, fullcnt, inobufsize, partialcnt, partialsize;
 struct dinode *inodebuf;
 
 struct dinode *
 getnextinode(inumber)
 	ino_t inumber;
 {
 	long size;
 	ufs_daddr_t dblk;
 	static struct dinode *dp;
 
 	if (inumber != nextino++ || inumber > maxino)
 		errx(EEXIT, "bad inode number %d to nextinode", inumber);
 	if (inumber >= lastinum) {
 		readcnt++;
 		dblk = fsbtodb(&sblock, ino_to_fsba(&sblock, lastinum));
 		if (readcnt % readpercg == 0) {
 			size = partialsize;
 			lastinum += partialcnt;
 		} else {
 			size = inobufsize;
 			lastinum += fullcnt;
 		}
 		(void)bread(fsreadfd, (char *)inodebuf, dblk, size); /* ??? */
 		dp = inodebuf;
 	}
 	return (dp++);
 }
 
 void
 resetinodebuf()
 {
 
 	startinum = 0;
 	nextino = 0;
 	lastinum = 0;
 	readcnt = 0;
 	inobufsize = blkroundup(&sblock, INOBUFSIZE);
 	fullcnt = inobufsize / sizeof(struct dinode);
 	readpercg = sblock.fs_ipg / fullcnt;
 	partialcnt = sblock.fs_ipg % fullcnt;
 	partialsize = partialcnt * sizeof(struct dinode);
 	if (partialcnt != 0) {
 		readpercg++;
 	} else {
 		partialcnt = fullcnt;
 		partialsize = inobufsize;
 	}
 	if (inodebuf == NULL &&
 	    (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
 		errx(EEXIT, "Cannot allocate space for inode buffer");
 	while (nextino < ROOTINO)
 		(void)getnextinode(nextino);
 }
 
 void
 freeinodebuf()
 {
 
 	if (inodebuf != NULL)
 		free((char *)inodebuf);
 	inodebuf = NULL;
 }
 
 /*
  * Routines to maintain information about directory inodes.
  * This is built during the first pass and used during the
  * second and third passes.
  *
  * Enter inodes into the cache.
  */
 void
 cacheino(dp, inumber)
 	register struct dinode *dp;
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 	struct inoinfo **inpp;
 	unsigned int blks;
 
 	blks = howmany(dp->di_size, sblock.fs_bsize);
 	if (blks > NDADDR)
 		blks = NDADDR + NIADDR;
 	inp = (struct inoinfo *)
 		malloc(sizeof(*inp) + (blks - 1) * sizeof(ufs_daddr_t));
 	if (inp == NULL)
 		return;
 	inpp = &inphead[inumber % numdirs];
 	inp->i_nexthash = *inpp;
 	*inpp = inp;
 	if (inumber == ROOTINO)
 		inp->i_parent = ROOTINO;
 	else
 		inp->i_parent = (ino_t)0;
 	inp->i_dotdot = (ino_t)0;
 	inp->i_number = inumber;
 	inp->i_isize = dp->di_size;
 	inp->i_numblks = blks * sizeof(ufs_daddr_t);
 	memmove(&inp->i_blks[0], &dp->di_db[0], (size_t)inp->i_numblks);
 	if (inplast == listmax) {
 		listmax += 100;
 		inpsort = (struct inoinfo **)realloc((char *)inpsort,
 		    (unsigned)listmax * sizeof(struct inoinfo *));
 		if (inpsort == NULL)
 			errx(EEXIT, "cannot increase directory list");
 	}
 	inpsort[inplast++] = inp;
 }
 
 /*
  * Look up an inode cache structure.
  */
 struct inoinfo *
 getinoinfo(inumber)
 	ino_t inumber;
 {
 	register struct inoinfo *inp;
 
 	for (inp = inphead[inumber % numdirs]; inp; inp = inp->i_nexthash) {
 		if (inp->i_number != inumber)
 			continue;
 		return (inp);
 	}
 	errx(EEXIT, "cannot find inode %d", inumber);
 	return ((struct inoinfo *)0);
 }
 
 /*
  * Clean up all the inode cache structure.
  */
 void
 inocleanup()
 {
 	register struct inoinfo **inpp;
 
 	if (inphead == NULL)
 		return;
 	for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--)
 		free((char *)(*inpp));
 	free((char *)inphead);
 	free((char *)inpsort);
 	inphead = inpsort = NULL;
 }
 
 void
 inodirty()
 {
 
 	dirty(pbp);
 }
 
 void
 clri(idesc, type, flag)
 	register struct inodesc *idesc;
 	char *type;
 	int flag;
 {
 	register struct dinode *dp;
 
 	dp = ginode(idesc->id_number);
 	if (flag == 1) {
 		pwarn("%s %s", type,
 		    (dp->di_mode & IFMT) == IFDIR ? "DIR" : "FILE");
 		pinode(idesc->id_number);
 	}
 	if (preen || reply("CLEAR") == 1) {
 		if (preen)
 			printf(" (CLEARED)\n");
 		n_files--;
 		(void)ckinode(dp, idesc);
 		clearinode(dp);
 		statemap[idesc->id_number] = USTATE;
 		inodirty();
 	}
 }
 
 int
 findname(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino != idesc->id_parent)
 		return (KEEPON);
 	memmove(idesc->id_name, dirp->d_name, (size_t)dirp->d_namlen + 1);
 	return (STOP|FOUND);
 }
 
 int
 findino(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 
 	if (dirp->d_ino == 0)
 		return (KEEPON);
 	if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
 	    dirp->d_ino >= ROOTINO && dirp->d_ino <= maxino) {
 		idesc->id_parent = dirp->d_ino;
 		return (STOP|FOUND);
 	}
 	return (KEEPON);
 }
 
 void
 pinode(ino)
 	ino_t ino;
 {
 	register struct dinode *dp;
 	register char *p;
 	struct passwd *pw;
 	time_t t;
 
 	printf(" I=%lu ", ino);
 	if (ino < ROOTINO || ino > maxino)
 		return;
 	dp = ginode(ino);
 	printf(" OWNER=");
 	if ((pw = getpwuid((int)dp->di_uid)) != 0)
 		printf("%s ", pw->pw_name);
 	else
 		printf("%u ", (unsigned)dp->di_uid);
 	printf("MODE=%o\n", dp->di_mode);
 	if (preen)
 		printf("%s: ", cdevname);
 	printf("SIZE=%qu ", dp->di_size);
 	t = dp->di_mtime;
 	p = ctime(&t);
 	printf("MTIME=%12.12s %4.4s ", &p[4], &p[20]);
 }
 
 void
 blkerror(ino, type, blk)
 	ino_t ino;
 	char *type;
 	ufs_daddr_t blk;
 {
 
 	pfatal("%ld %s I=%lu", blk, type, ino);
 	printf("\n");
 	switch (statemap[ino]) {
 
 	case FSTATE:
 		statemap[ino] = FCLEAR;
 		return;
 
 	case DSTATE:
 		statemap[ino] = DCLEAR;
 		return;
 
 	case FCLEAR:
 	case DCLEAR:
 		return;
 
 	default:
 		errx(EEXIT, "BAD STATE %d TO BLKERR", statemap[ino]);
 		/* NOTREACHED */
 	}
 }
 
 /*
  * allocate an unused inode
  */
 ino_t
 allocino(request, type)
 	ino_t request;
 	int type;
 {
 	register ino_t ino;
 	register struct dinode *dp;
+	struct cg *cgp = &cgrp;
+	int cg;
 
 	if (request == 0)
 		request = ROOTINO;
 	else if (statemap[request] != USTATE)
 		return (0);
 	for (ino = request; ino < maxino; ino++)
 		if (statemap[ino] == USTATE)
 			break;
 	if (ino == maxino)
 		return (0);
+	cg = ino_to_cg(&sblock, ino);
+	getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+	if (!cg_chkmagic(cgp))
+		pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+	setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
+	cgp->cg_cs.cs_nifree--;
 	switch (type & IFMT) {
 	case IFDIR:
 		statemap[ino] = DSTATE;
+		cgp->cg_cs.cs_ndir++;
 		break;
 	case IFREG:
 	case IFLNK:
 		statemap[ino] = FSTATE;
 		break;
 	default:
 		return (0);
 	}
+	cgdirty();
 	dp = ginode(ino);
 	dp->di_db[0] = allocblk((long)1);
 	if (dp->di_db[0] == 0) {
 		statemap[ino] = USTATE;
 		return (0);
 	}
+	dp->di_flags = 0;
 	dp->di_mode = type;
 	dp->di_atime = time(NULL);
 	dp->di_mtime = dp->di_ctime = dp->di_atime;
 	dp->di_size = sblock.fs_fsize;
 	dp->di_blocks = btodb(sblock.fs_fsize);
 	n_files++;
 	inodirty();
 	if (newinofmt)
 		typemap[ino] = IFTODT(type);
 	return (ino);
 }
 
 /*
  * deallocate an inode
  */
 void
 freeino(ino)
 	ino_t ino;
 {
 	struct inodesc idesc;
 	struct dinode *dp;
 
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass4check;
 	idesc.id_number = ino;
 	dp = ginode(ino);
 	(void)ckinode(dp, &idesc);
 	clearinode(dp);
 	inodirty();
 	statemap[ino] = USTATE;
 	n_files--;
 }
Index: head/sbin/fsck_ifs/main.c
===================================================================
--- head/sbin/fsck_ifs/main.c	(revision 34265)
+++ head/sbin/fsck_ifs/main.c	(revision 34266)
@@ -1,353 +1,359 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1986, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)main.c	8.6 (Berkeley) 5/14/95";
 #endif
 static const char rcsid[] =
-	"$Id$";
+	"$Id: main.c,v 1.12 1997/12/20 22:24:32 bde Exp $";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <fstab.h>
 #include <string.h>
 
 #include "fsck.h"
 
 int	returntosingle;
 
 static int argtoi __P((int flag, char *req, char *str, int base));
 static int docheck __P((struct fstab *fsp));
 static int checkfilesys __P((char *filesys, char *mntpt, long auxdata,
 		int child));
 int main __P((int argc, char *argv[]));
 
 int
 main(argc, argv)
 	int	argc;
 	char	*argv[];
 {
 	int ch;
 	int ret, maxrun = 0;
 
 	sync();
 	while ((ch = getopt(argc, argv, "dfpnNyYb:c:l:m:")) != -1) {
 		switch (ch) {
 		case 'p':
 			preen++;
 			break;
 
 		case 'b':
 			bflag = argtoi('b', "number", optarg, 10);
 			printf("Alternate super block location: %d\n", bflag);
 			break;
 
 		case 'c':
 			cvtlevel = argtoi('c', "conversion level", optarg, 10);
 			break;
 
 		case 'd':
 			debug++;
 			break;
 
 		case 'f':
 			fflag++;
 			break;
 
 		case 'l':
 			maxrun = argtoi('l', "number", optarg, 10);
 			break;
 
 		case 'm':
 			lfmode = argtoi('m', "mode", optarg, 8);
 			if (lfmode &~ 07777)
 				errx(EEXIT, "bad mode to -m: %o", lfmode);
 			printf("** lost+found creation mode %o\n", lfmode);
 			break;
 
 		case 'n':
 		case 'N':
 			nflag++;
 			yflag = 0;
 			break;
 
 		case 'y':
 		case 'Y':
 			yflag++;
 			nflag = 0;
 			break;
 
 		default:
 			errx(EEXIT, "%c option?", ch);
 		}
 	}
 	argc -= optind;
 	argv += optind;
 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
 		(void)signal(SIGINT, catch);
 	if (preen)
 		(void)signal(SIGQUIT, catchquit);
 	if (argc) {
 		while (argc-- > 0)
 			(void)checkfilesys(blockcheck(*argv++), 0, 0L, 0);
 		exit(0);
 	}
 	ret = checkfstab(preen, maxrun, docheck, checkfilesys);
 	if (returntosingle)
 		exit(2);
 	exit(ret);
 }
 
 static int
 argtoi(flag, req, str, base)
 	int flag;
 	char *req, *str;
 	int base;
 {
 	char *cp;
 	int ret;
 
 	ret = (int)strtol(str, &cp, base);
 	if (cp == str || *cp)
 		errx(EEXIT, "-%c flag requires a %s", flag, req);
 	return (ret);
 }
 
 /*
  * Determine whether a filesystem should be checked.
  */
 static int
 docheck(fsp)
 	register struct fstab *fsp;
 {
 
 	if (strcmp(fsp->fs_vfstype, "ufs") ||
 	    (strcmp(fsp->fs_type, FSTAB_RW) &&
 	     strcmp(fsp->fs_type, FSTAB_RO)) ||
 	    fsp->fs_passno == 0)
 		return (0);
 	return (1);
 }
 
 /*
  * Check the specified filesystem.
  */
 /* ARGSUSED */
 static int
 checkfilesys(filesys, mntpt, auxdata, child)
 	char *filesys, *mntpt;
 	long auxdata;
 	int child;
 {
 	ufs_daddr_t n_ffree, n_bfree;
 	struct dups *dp;
 	struct zlncnt *zlnp;
 	int cylno, flags;
 
 	if (preen && child)
 		(void)signal(SIGQUIT, voidquit);
 	cdevname = filesys;
 	if (debug && preen)
 		pwarn("starting\n");
 	switch (setup(filesys)) {
 	case 0:
 		if (preen)
 			pfatal("CAN'T CHECK FILE SYSTEM.");
 		return (0);
 	case -1:
 		pwarn("clean, %ld free ", sblock.fs_cstotal.cs_nffree +
 		    sblock.fs_frag * sblock.fs_cstotal.cs_nbfree);
 		printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 		    sblock.fs_cstotal.cs_nffree, sblock.fs_cstotal.cs_nbfree,
 		    sblock.fs_cstotal.cs_nffree * 100.0 / sblock.fs_dsize);
 		return (0);
 	}
 
 	/*
+	 * Cleared if any questions answered no. Used to decide if
+	 * the superblock should be marked clean.
+	 */
+	resolved = 1;
+	/*
 	 * 1: scan inodes tallying blocks used
 	 */
 	if (preen == 0) {
 		printf("** Last Mounted on %s\n", sblock.fs_fsmnt);
 		if (hotroot)
 			printf("** Root file system\n");
 		printf("** Phase 1 - Check Blocks and Sizes\n");
 	}
 	pass1();
 
 	/*
 	 * 1b: locate first references to duplicates, if any
 	 */
 	if (duplist) {
-		if (preen)
+		if (preen || usedsoftdep)
 			pfatal("INTERNAL ERROR: dups with -p");
 		printf("** Phase 1b - Rescan For More DUPS\n");
 		pass1b();
 	}
 
 	/*
 	 * 2: traverse directories from root to mark all connected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 2 - Check Pathnames\n");
 	pass2();
 
 	/*
 	 * 3: scan inodes looking for disconnected directories
 	 */
 	if (preen == 0)
 		printf("** Phase 3 - Check Connectivity\n");
 	pass3();
 
 	/*
 	 * 4: scan inodes looking for disconnected files; check reference counts
 	 */
 	if (preen == 0)
 		printf("** Phase 4 - Check Reference Counts\n");
 	pass4();
 
 	/*
 	 * 5: check and repair resource counts in cylinder groups
 	 */
 	if (preen == 0)
 		printf("** Phase 5 - Check Cyl groups\n");
 	pass5();
 
 	/*
 	 * print out summary statistics
 	 */
 	n_ffree = sblock.fs_cstotal.cs_nffree;
 	n_bfree = sblock.fs_cstotal.cs_nbfree;
 	pwarn("%ld files, %ld used, %ld free ",
 	    n_files, n_blks, n_ffree + sblock.fs_frag * n_bfree);
 	printf("(%d frags, %d blocks, %.1f%% fragmentation)\n",
 	    n_ffree, n_bfree, n_ffree * 100.0 / sblock.fs_dsize);
 	if (debug &&
 	    (n_files -= maxino - ROOTINO - sblock.fs_cstotal.cs_nifree))
 		printf("%d files missing\n", n_files);
 	if (debug) {
 		n_blks += sblock.fs_ncg *
 			(cgdmin(&sblock, 0) - cgsblock(&sblock, 0));
 		n_blks += cgsblock(&sblock, 0) - cgbase(&sblock, 0);
 		n_blks += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		if (n_blks -= maxfsblock - (n_ffree + sblock.fs_frag * n_bfree))
 			printf("%d blocks missing\n", n_blks);
 		if (duplist != NULL) {
 			printf("The following duplicate blocks remain:");
 			for (dp = duplist; dp; dp = dp->next)
 				printf(" %d,", dp->dup);
 			printf("\n");
 		}
 		if (zlnhead != NULL) {
 			printf("The following zero link count inodes remain:");
 			for (zlnp = zlnhead; zlnp; zlnp = zlnp->next)
 				printf(" %u,", zlnp->zlncnt);
 			printf("\n");
 		}
 	}
 	zlnhead = (struct zlncnt *)0;
 	duplist = (struct dups *)0;
 	muldup = (struct dups *)0;
 	inocleanup();
 	if (fsmodified) {
 		(void)time(&sblock.fs_time);
 		sbdirty();
 	}
 	if (cvtlevel && sblk.b_dirty) {
 		/*
 		 * Write out the duplicate super blocks
 		 */
 		for (cylno = 0; cylno < sblock.fs_ncg; cylno++)
 			bwrite(fswritefd, (char *)&sblock,
 			    fsbtodb(&sblock, cgsblock(&sblock, cylno)), SBSIZE);
 	}
-	if (!hotroot) {
-		ckfini(1);
-	} else {
+	if (rerun)
+		resolved = 0;
+	flags = 0;
+	if (hotroot) {
 		struct statfs stfs_buf;
 		/*
 		 * Check to see if root is mounted read-write.
 		 */
 		if (statfs("/", &stfs_buf) == 0)
 			flags = stfs_buf.f_flags;
-		else
-			flags = 0;
-		ckfini(flags & MNT_RDONLY);
+		if ((flags & MNT_RDONLY) == 0)
+			resolved = 0;
 	}
+	ckfini(resolved);
 	free(blockmap);
 	free(statemap);
 	free((char *)lncntp);
 	if (!fsmodified)
 		return (0);
 	if (!preen)
 		printf("\n***** FILE SYSTEM WAS MODIFIED *****\n");
 	if (rerun)
 		printf("\n***** PLEASE RERUN FSCK *****\n");
 	if (hotroot) {
 		struct ufs_args args;
 		int ret;
 		/*
 		 * We modified the root.  Do a mount update on
 		 * it, unless it is read-write, so we can continue.
 		 */
 		if (flags & MNT_RDONLY) {
 			args.fspec = 0;
 			args.export.ex_flags = 0;
 			args.export.ex_root = 0;
 			flags |= MNT_UPDATE | MNT_RELOAD;
 			ret = mount("ufs", "/", flags, &args);
 			if (ret == 0)
 				return (0);
 		}
 		if (!preen)
 			printf("\n***** REBOOT NOW *****\n");
 		sync();
 		return (4);
 	}
 	return (0);
 }
Index: head/sbin/fsck_ifs/pass1.c
===================================================================
--- head/sbin/fsck_ifs/pass1.c	(revision 34265)
+++ head/sbin/fsck_ifs/pass1.c	(revision 34266)
@@ -1,322 +1,330 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass1.c	8.6 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 static ufs_daddr_t badblk;
 static ufs_daddr_t dupblk;
 
 static void checkinode __P((ino_t inumber, struct inodesc *));
 
 void
 pass1()
 {
 	ino_t inumber;
 	int c, i, cgd;
 	struct inodesc idesc;
 
 	/*
 	 * Set file system reserved blocks in used block map.
 	 */
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		cgd = cgdmin(&sblock, c);
 		if (c == 0) {
 			i = cgbase(&sblock, c);
 			cgd += howmany(sblock.fs_cssize, sblock.fs_fsize);
 		} else
 			i = cgsblock(&sblock, c);
 		for (; i < cgd; i++)
 			setbmap(i);
 	}
 	/*
 	 * Find all allocated blocks.
 	 */
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = ADDR;
 	idesc.id_func = pass1check;
 	inumber = 0;
 	n_files = n_blks = 0;
 	resetinodebuf();
 	for (c = 0; c < sblock.fs_ncg; c++) {
 		for (i = 0; i < sblock.fs_ipg; i++, inumber++) {
 			if (inumber < ROOTINO)
 				continue;
 			checkinode(inumber, &idesc);
 		}
 	}
 	freeinodebuf();
 }
 
 static void
 checkinode(inumber, idesc)
 	ino_t inumber;
 	register struct inodesc *idesc;
 {
 	register struct dinode *dp;
 	struct zlncnt *zlnp;
 	int ndb, j;
 	mode_t mode;
 	char *symbuf;
 
 	dp = getnextinode(inumber);
 	mode = dp->di_mode & IFMT;
 	if (mode == 0) {
 		if (memcmp(dp->di_db, zino.di_db,
 			NDADDR * sizeof(ufs_daddr_t)) ||
 		    memcmp(dp->di_ib, zino.di_ib,
 			NIADDR * sizeof(ufs_daddr_t)) ||
 		    dp->di_mode || dp->di_size) {
 			pfatal("PARTIALLY ALLOCATED INODE I=%lu", inumber);
 			if (reply("CLEAR") == 1) {
 				dp = ginode(inumber);
 				clearinode(dp);
 				inodirty();
 			}
 		}
 		statemap[inumber] = USTATE;
 		return;
 	}
 	lastino = inumber;
 	if (/* dp->di_size < 0 || */
 	    dp->di_size + sblock.fs_bsize - 1 < dp->di_size ||
 	    (mode == IFDIR && dp->di_size > MAXDIRSIZE)) {
 		if (debug)
 			printf("bad size %qu:", dp->di_size);
 		goto unknown;
 	}
 	if (!preen && mode == IFMT && reply("HOLD BAD BLOCK") == 1) {
 		dp = ginode(inumber);
 		dp->di_size = sblock.fs_fsize;
 		dp->di_mode = IFREG|0600;
 		inodirty();
 	}
 	ndb = howmany(dp->di_size, sblock.fs_bsize);
 	if (ndb < 0) {
 		if (debug)
 			printf("bad size %qu ndb %d:",
 				dp->di_size, ndb);
 		goto unknown;
 	}
 	if (mode == IFBLK || mode == IFCHR)
 		ndb++;
 	if (mode == IFLNK) {
 		if (doinglevel2 &&
 		    dp->di_size > 0 && dp->di_size < MAXSYMLINKLEN &&
 		    dp->di_blocks != 0) {
 			symbuf = alloca(secsize);
 			if (bread(fsreadfd, symbuf,
 			    fsbtodb(&sblock, dp->di_db[0]),
 			    (long)secsize) != 0)
 				errx(EEXIT, "cannot read symlink");
 			if (debug) {
 				symbuf[dp->di_size] = 0;
 				printf("convert symlink %ld(%s) of size %ld\n",
 					inumber, symbuf, (long)dp->di_size);
 			}
 			dp = ginode(inumber);
 			memmove(dp->di_shortlink, symbuf, (long)dp->di_size);
 			dp->di_blocks = 0;
 			inodirty();
 		}
 		/*
 		 * Fake ndb value so direct/indirect block checks below
 		 * will detect any garbage after symlink string.
 		 */
 		if (dp->di_size < sblock.fs_maxsymlinklen ||
 		    dp->di_blocks == 0) {
 			ndb = howmany(dp->di_size, sizeof(ufs_daddr_t));
 			if (ndb > NDADDR) {
 				j = ndb - NDADDR;
 				for (ndb = 1; j > 1; j--)
 					ndb *= NINDIR(&sblock);
 				ndb += NDADDR;
 			}
 		}
 	}
 	for (j = ndb; j < NDADDR; j++)
 		if (dp->di_db[j] != 0) {
 			if (debug)
 				printf("bad direct addr: %ld\n", dp->di_db[j]);
 			goto unknown;
 		}
 	for (j = 0, ndb -= NDADDR; ndb > 0; j++)
 		ndb /= NINDIR(&sblock);
 	for (; j < NIADDR; j++)
 		if (dp->di_ib[j] != 0) {
 			if (debug)
 				printf("bad indirect addr: %ld\n",
 					dp->di_ib[j]);
 			goto unknown;
 		}
 	if (ftypeok(dp) == 0)
 		goto unknown;
 	n_files++;
 	lncntp[inumber] = dp->di_nlink;
 	if (dp->di_nlink <= 0) {
 		zlnp = (struct zlncnt *)malloc(sizeof *zlnp);
 		if (zlnp == NULL) {
 			pfatal("LINK COUNT TABLE OVERFLOW");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 		} else {
 			zlnp->zlncnt = inumber;
 			zlnp->next = zlnhead;
 			zlnhead = zlnp;
 		}
 	}
 	if (mode == IFDIR) {
 		if (dp->di_size == 0)
 			statemap[inumber] = DCLEAR;
 		else
 			statemap[inumber] = DSTATE;
 		cacheino(dp, inumber);
 	} else
 		statemap[inumber] = FSTATE;
 	typemap[inumber] = IFTODT(mode);
 	if (doinglevel2 &&
 	    (dp->di_ouid != (u_short)-1 || dp->di_ogid != (u_short)-1)) {
 		dp = ginode(inumber);
 		dp->di_uid = dp->di_ouid;
 		dp->di_ouid = -1;
 		dp->di_gid = dp->di_ogid;
 		dp->di_ogid = -1;
 		inodirty();
 	}
 	badblk = dupblk = 0;
 	idesc->id_number = inumber;
 	(void)ckinode(dp, idesc);
 	idesc->id_entryno *= btodb(sblock.fs_fsize);
 	if (dp->di_blocks != idesc->id_entryno) {
 		pwarn("INCORRECT BLOCK COUNT I=%lu (%ld should be %ld)",
 		    inumber, dp->di_blocks, idesc->id_entryno);
 		if (preen)
 			printf(" (CORRECTED)\n");
 		else if (reply("CORRECT") == 0)
 			return;
 		dp = ginode(inumber);
 		dp->di_blocks = idesc->id_entryno;
 		inodirty();
 	}
 	return;
 unknown:
 	pfatal("UNKNOWN FILE TYPE I=%lu", inumber);
 	statemap[inumber] = FCLEAR;
 	if (reply("CLEAR") == 1) {
 		statemap[inumber] = USTATE;
 		dp = ginode(inumber);
 		clearinode(dp);
 		inodirty();
 	}
 }
 
 int
 pass1check(idesc)
 	register struct inodesc *idesc;
 {
 	int res = KEEPON;
 	int anyout, nfrags;
 	ufs_daddr_t blkno = idesc->id_blkno;
 	register struct dups *dlp;
 	struct dups *new;
 
 	if ((anyout = chkrange(blkno, idesc->id_numfrags)) != 0) {
 		blkerror(idesc->id_number, "BAD", blkno);
 		if (badblk++ >= MAXBAD) {
 			pwarn("EXCESSIVE BAD BLKS I=%lu",
 				idesc->id_number);
 			if (preen)
 				printf(" (SKIPPING)\n");
-			else if (reply("CONTINUE") == 0)
+			else if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			return (STOP);
 		}
 	}
 	for (nfrags = idesc->id_numfrags; nfrags > 0; blkno++, nfrags--) {
 		if (anyout && chkrange(blkno, 1)) {
 			res = SKIP;
 		} else if (!testbmap(blkno)) {
 			n_blks++;
 			setbmap(blkno);
 		} else {
 			blkerror(idesc->id_number, "DUP", blkno);
 			if (dupblk++ >= MAXDUP) {
 				pwarn("EXCESSIVE DUP BLKS I=%lu",
 					idesc->id_number);
 				if (preen)
 					printf(" (SKIPPING)\n");
-				else if (reply("CONTINUE") == 0)
+				else if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new = (struct dups *)malloc(sizeof(struct dups));
 			if (new == NULL) {
 				pfatal("DUP TABLE OVERFLOW.");
-				if (reply("CONTINUE") == 0)
+				if (reply("CONTINUE") == 0) {
+					ckfini(0);
 					exit(EEXIT);
+				}
 				return (STOP);
 			}
 			new->dup = blkno;
 			if (muldup == 0) {
 				duplist = muldup = new;
 				new->next = 0;
 			} else {
 				new->next = muldup->next;
 				muldup->next = new;
 			}
 			for (dlp = duplist; dlp != muldup; dlp = dlp->next)
 				if (dlp->dup == blkno)
 					break;
 			if (dlp == muldup && dlp->dup != blkno)
 				muldup = new;
 		}
 		/*
 		 * count the number of blocks found in id_entryno
 		 */
 		idesc->id_entryno++;
 	}
 	return (res);
 }
Index: head/sbin/fsck_ifs/pass2.c
===================================================================
--- head/sbin/fsck_ifs/pass2.c	(revision 34265)
+++ head/sbin/fsck_ifs/pass2.c	(revision 34266)
@@ -1,467 +1,482 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass2.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 #define MINDIRSIZE	(sizeof (struct dirtemplate))
 
 static int blksort __P((const void *, const void *));
 static int pass2check __P((struct inodesc *));
 
 void
 pass2()
 {
 	register struct dinode *dp;
 	register struct inoinfo **inpp, *inp;
 	struct inoinfo **inpend;
 	struct inodesc curino;
 	struct dinode dino;
 	char pathbuf[MAXPATHLEN + 1];
 
 	switch (statemap[ROOTINO]) {
 
 	case USTATE:
 		pfatal("ROOT INODE UNALLOCATED");
-		if (reply("ALLOCATE") == 0)
+		if (reply("ALLOCATE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 			errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 		break;
 
 	case DCLEAR:
 		pfatal("DUPS/BAD IN ROOT INODE");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("CONTINUE") == 0)
+		if (reply("CONTINUE") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		break;
 
 	case FSTATE:
 	case FCLEAR:
 		pfatal("ROOT INODE NOT DIRECTORY");
 		if (reply("REALLOCATE")) {
 			freeino(ROOTINO);
 			if (allocdir(ROOTINO, ROOTINO, 0755) != ROOTINO)
 				errx(EEXIT, "CANNOT ALLOCATE ROOT INODE");
 			break;
 		}
-		if (reply("FIX") == 0)
+		if (reply("FIX") == 0) {
+			ckfini(0);
 			exit(EEXIT);
+		}
 		dp = ginode(ROOTINO);
 		dp->di_mode &= ~IFMT;
 		dp->di_mode |= IFDIR;
 		inodirty();
 		break;
 
 	case DSTATE:
 		break;
 
 	default:
 		errx(EEXIT, "BAD STATE %d FOR ROOT INODE", statemap[ROOTINO]);
 	}
 	statemap[ROOTINO] = DFOUND;
 	if (newinofmt) {
 		statemap[WINO] = FSTATE;
 		typemap[WINO] = DT_WHT;
 	}
 	/*
 	 * Sort the directory list into disk block order.
 	 */
 	qsort((char *)inpsort, (size_t)inplast, sizeof *inpsort, blksort);
 	/*
 	 * Check the integrity of each directory.
 	 */
 	memset(&curino, 0, sizeof(struct inodesc));
 	curino.id_type = DATA;
 	curino.id_func = pass2check;
 	dp = &dino;
 	inpend = &inpsort[inplast];
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_isize == 0)
 			continue;
 		if (inp->i_isize < MINDIRSIZE) {
 			direrror(inp->i_number, "DIRECTORY TOO SHORT");
 			inp->i_isize = roundup(MINDIRSIZE, DIRBLKSIZ);
 			if (reply("FIX") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = inp->i_isize;
 				inodirty();
 				dp = &dino;
 			}
 		} else if ((inp->i_isize & (DIRBLKSIZ - 1)) != 0) {
 			getpathname(pathbuf, inp->i_number, inp->i_number);
-			pwarn("DIRECTORY %s: LENGTH %d NOT MULTIPLE OF %d",
-				pathbuf, inp->i_isize, DIRBLKSIZ);
+			if (usedsoftdep)
+				pfatal("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
+			else
+				pwarn("%s %s: LENGTH %d NOT MULTIPLE OF %d",
+					"DIRECTORY", pathbuf, inp->i_isize,
+					DIRBLKSIZ);
 			if (preen)
 				printf(" (ADJUSTED)\n");
 			inp->i_isize = roundup(inp->i_isize, DIRBLKSIZ);
 			if (preen || reply("ADJUST") == 1) {
 				dp = ginode(inp->i_number);
 				dp->di_size = roundup(inp->i_isize, DIRBLKSIZ);
 				inodirty();
 				dp = &dino;
 			}
 		}
 		memset(&dino, 0, sizeof(struct dinode));
 		dino.di_mode = IFDIR;
 		dp->di_size = inp->i_isize;
 		memmove(&dp->di_db[0], &inp->i_blks[0], (size_t)inp->i_numblks);
 		curino.id_number = inp->i_number;
 		curino.id_parent = inp->i_parent;
 		(void)ckinode(dp, &curino);
 	}
 	/*
 	 * Now that the parents of all directories have been found,
 	 * make another pass to verify the value of `..'
 	 */
 	for (inpp = inpsort; inpp < inpend; inpp++) {
 		inp = *inpp;
 		if (inp->i_parent == 0 || inp->i_isize == 0)
 			continue;
 		if (statemap[inp->i_parent] == DFOUND &&
 		    statemap[inp->i_number] == DSTATE)
 			statemap[inp->i_number] = DFOUND;
 		if (inp->i_dotdot == inp->i_parent ||
 		    inp->i_dotdot == (ino_t)-1)
 			continue;
 		if (inp->i_dotdot == 0) {
 			inp->i_dotdot = inp->i_parent;
 			fileerror(inp->i_parent, inp->i_number, "MISSING '..'");
 			if (reply("FIX") == 0)
 				continue;
 			(void)makeentry(inp->i_number, inp->i_parent, "..");
 			lncntp[inp->i_parent]--;
 			continue;
 		}
 		fileerror(inp->i_parent, inp->i_number,
 		    "BAD INODE NUMBER FOR '..'");
 		if (reply("FIX") == 0)
 			continue;
 		lncntp[inp->i_dotdot]++;
 		lncntp[inp->i_parent]--;
 		inp->i_dotdot = inp->i_parent;
 		(void)changeino(inp->i_number, "..", inp->i_parent);
 	}
 	/*
 	 * Mark all the directories that can be found from the root.
 	 */
 	propagate();
 }
 
 static int
 pass2check(idesc)
 	struct inodesc *idesc;
 {
 	register struct direct *dirp = idesc->id_dirp;
 	register struct inoinfo *inp;
 	int n, entrysize, ret = 0;
 	struct dinode *dp;
 	char *errmsg;
 	struct direct proto;
 	char namebuf[MAXPATHLEN + 1];
 	char pathbuf[MAXPATHLEN + 1];
 
 	/*
 	 * If converting, set directory entry type.
 	 */
 	if (doinglevel2 && dirp->d_ino > 0 && dirp->d_ino < maxino) {
 		dirp->d_type = typemap[dirp->d_ino];
 		ret |= ALTERED;
 	}
 	/*
 	 * check for "."
 	 */
 	if (idesc->id_entryno != 0)
 		goto chk1;
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") == 0) {
 		if (dirp->d_ino != idesc->id_number) {
 			direrror(idesc->id_number, "BAD INODE NUMBER FOR '.'");
 			dirp->d_ino = idesc->id_number;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '.'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk1;
 	}
 	direrror(idesc->id_number, "MISSING '.'");
 	proto.d_ino = idesc->id_number;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 1;
 	(void)strcpy(proto.d_name, ".");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") != 0) {
 		pfatal("CANNOT FIX, FIRST ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 	} else if (dirp->d_reclen < entrysize) {
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '.'\n");
 	} else if (dirp->d_reclen < 2 * entrysize) {
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 		n = dirp->d_reclen - entrysize;
 		proto.d_reclen = entrysize;
 		memmove(dirp, &proto, (size_t)entrysize);
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + entrysize);
 		memset(dirp, 0, (size_t)n);
 		dirp->d_reclen = n;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 chk1:
 	if (idesc->id_entryno > 1)
 		goto chk2;
 	inp = getinoinfo(idesc->id_number);
 	proto.d_ino = inp->i_parent;
 	if (newinofmt)
 		proto.d_type = DT_DIR;
 	else
 		proto.d_type = 0;
 	proto.d_namlen = 2;
 	(void)strcpy(proto.d_name, "..");
 #	if BYTE_ORDER == LITTLE_ENDIAN
 		if (!newinofmt) {
 			u_char tmp;
 
 			tmp = proto.d_type;
 			proto.d_type = proto.d_namlen;
 			proto.d_namlen = tmp;
 		}
 #	endif
 	entrysize = DIRSIZ(0, &proto);
 	if (idesc->id_entryno == 0) {
 		n = DIRSIZ(0, dirp);
 		if (dirp->d_reclen < n + entrysize)
 			goto chk2;
 		proto.d_reclen = dirp->d_reclen - n;
 		dirp->d_reclen = n;
 		idesc->id_entryno++;
 		lncntp[dirp->d_ino]--;
 		dirp = (struct direct *)((char *)(dirp) + n);
 		memset(dirp, 0, (size_t)proto.d_reclen);
 		dirp->d_reclen = proto.d_reclen;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, "..") == 0) {
 		inp->i_dotdot = dirp->d_ino;
 		if (newinofmt && dirp->d_type != DT_DIR) {
 			direrror(idesc->id_number, "BAD TYPE VALUE FOR '..'");
 			dirp->d_type = DT_DIR;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 		}
 		goto chk2;
 	}
 	if (dirp->d_ino != 0 && strcmp(dirp->d_name, ".") != 0) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, SECOND ENTRY IN DIRECTORY CONTAINS %s\n",
 			dirp->d_name);
 		inp->i_dotdot = (ino_t)-1;
 	} else if (dirp->d_reclen < entrysize) {
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		pfatal("CANNOT FIX, INSUFFICIENT SPACE TO ADD '..'\n");
 		inp->i_dotdot = (ino_t)-1;
 	} else if (inp->i_parent != 0) {
 		/*
 		 * We know the parent, so fix now.
 		 */
 		inp->i_dotdot = inp->i_parent;
 		fileerror(inp->i_parent, idesc->id_number, "MISSING '..'");
 		proto.d_reclen = dirp->d_reclen;
 		memmove(dirp, &proto, (size_t)entrysize);
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	}
 	idesc->id_entryno++;
 	if (dirp->d_ino != 0)
 		lncntp[dirp->d_ino]--;
 	return (ret|KEEPON);
 chk2:
 	if (dirp->d_ino == 0)
 		return (ret|KEEPON);
 	if (dirp->d_namlen <= 2 &&
 	    dirp->d_name[0] == '.' &&
 	    idesc->id_entryno >= 2) {
 		if (dirp->d_namlen == 1) {
 			direrror(idesc->id_number, "EXTRA '.' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 		if (dirp->d_name[1] == '.') {
 			direrror(idesc->id_number, "EXTRA '..' ENTRY");
 			dirp->d_ino = 0;
 			if (reply("FIX") == 1)
 				ret |= ALTERED;
 			return (KEEPON | ret);
 		}
 	}
 	idesc->id_entryno++;
 	n = 0;
 	if (dirp->d_ino > maxino) {
 		fileerror(idesc->id_number, dirp->d_ino, "I OUT OF RANGE");
 		n = reply("REMOVE");
 	} else if (newinofmt &&
 		   ((dirp->d_ino == WINO && dirp->d_type != DT_WHT) ||
 		    (dirp->d_ino != WINO && dirp->d_type == DT_WHT))) {
 		fileerror(idesc->id_number, dirp->d_ino, "BAD WHITEOUT ENTRY");
 		dirp->d_ino = WINO;
 		dirp->d_type = DT_WHT;
 		if (reply("FIX") == 1)
 			ret |= ALTERED;
 	} else {
 again:
 		switch (statemap[dirp->d_ino]) {
 		case USTATE:
 			if (idesc->id_entryno <= 2)
 				break;
 			fileerror(idesc->id_number, dirp->d_ino, "UNALLOCATED");
 			n = reply("REMOVE");
 			break;
 
 		case DCLEAR:
 		case FCLEAR:
 			if (idesc->id_entryno <= 2)
 				break;
 			if (statemap[dirp->d_ino] == FCLEAR)
 				errmsg = "DUP/BAD";
-			else if (!preen)
+			else if (!preen && !usedsoftdep)
 				errmsg = "ZERO LENGTH DIRECTORY";
 			else {
 				n = 1;
 				break;
 			}
 			fileerror(idesc->id_number, dirp->d_ino, errmsg);
 			if ((n = reply("REMOVE")) == 1)
 				break;
 			dp = ginode(dirp->d_ino);
 			statemap[dirp->d_ino] =
 			    (dp->di_mode & IFMT) == IFDIR ? DSTATE : FSTATE;
 			lncntp[dirp->d_ino] = dp->di_nlink;
 			goto again;
 
 		case DSTATE:
 			if (statemap[idesc->id_number] == DFOUND)
 				statemap[dirp->d_ino] = DFOUND;
 			/* fall through */
 
 		case DFOUND:
 			inp = getinoinfo(dirp->d_ino);
 			if (inp->i_parent != 0 && idesc->id_entryno > 2) {
 				getpathname(pathbuf, idesc->id_number,
 				    idesc->id_number);
 				getpathname(namebuf, dirp->d_ino, dirp->d_ino);
 				pwarn("%s %s %s\n", pathbuf,
 				    "IS AN EXTRANEOUS HARD LINK TO DIRECTORY",
 				    namebuf);
-				if (preen)
-					printf(" (IGNORED)\n");
+				if (preen) {
+					printf(" (REMOVED)\n");
+  					n = 1;
+  					break;
+				}
 				else if ((n = reply("REMOVE")) == 1)
 					break;
 			}
 			if (idesc->id_entryno > 2)
 				inp->i_parent = idesc->id_number;
 			/* fall through */
 
 		case FSTATE:
 			if (newinofmt && dirp->d_type != typemap[dirp->d_ino]) {
 				fileerror(idesc->id_number, dirp->d_ino,
 				    "BAD TYPE VALUE");
 				dirp->d_type = typemap[dirp->d_ino];
 				if (reply("FIX") == 1)
 					ret |= ALTERED;
 			}
 			lncntp[dirp->d_ino]--;
 			break;
 
 		default:
 			errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 			    statemap[dirp->d_ino], dirp->d_ino);
 		}
 	}
 	if (n == 0)
 		return (ret|KEEPON);
 	dirp->d_ino = 0;
 	return (ret|KEEPON|ALTERED);
 }
 
 /*
  * Routine to sort disk blocks.
  */
 static int
 blksort(arg1, arg2)
 	const void *arg1, *arg2;
 {
 
 	return ((*(struct inoinfo **)arg1)->i_blks[0] -
 		(*(struct inoinfo **)arg2)->i_blks[0]);
 }
Index: head/sbin/fsck_ifs/pass5.c
===================================================================
--- head/sbin/fsck_ifs/pass5.c	(revision 34265)
+++ head/sbin/fsck_ifs/pass5.c	(revision 34266)
@@ -1,345 +1,375 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)pass5.c	8.9 (Berkeley) 4/28/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 void
 pass5()
 {
 	int c, blk, frags, basesize, sumsize, mapsize, savednrpos;
+	int inomapsize, blkmapsize;
 	struct fs *fs = &sblock;
 	struct cg *cg = &cgrp;
 	ufs_daddr_t dbase, dmax;
 	ufs_daddr_t d;
-	long i, j;
+	long i, j, k;
 	struct csum *cs;
 	struct csum cstotal;
 	struct inodesc idesc[3];
 	char buf[MAXBSIZE];
 	register struct cg *newcg = (struct cg *)buf;
 	struct ocg *ocg = (struct ocg *)buf;
 
 	statemap[WINO] = USTATE;
 	memset(newcg, 0, (size_t)fs->fs_cgsize);
 	newcg->cg_niblk = fs->fs_ipg;
 	if (cvtlevel >= 3) {
 		if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) {
 			if (preen)
 				pwarn("DELETING CLUSTERING MAPS\n");
 			if (preen || reply("DELETE CLUSTERING MAPS")) {
 				fs->fs_contigsumsize = 0;
 				doinglevel1 = 1;
 				sbdirty();
 			}
 		}
 		if (fs->fs_maxcontig > 1) {
 			char *doit = 0;
 
 			if (fs->fs_contigsumsize < 1) {
 				doit = "CREAT";
 			} else if (fs->fs_contigsumsize < fs->fs_maxcontig &&
 				   fs->fs_contigsumsize < FS_MAXCONTIG) {
 				doit = "EXPAND";
 			}
 			if (doit) {
 				i = fs->fs_contigsumsize;
 				fs->fs_contigsumsize =
 				    MIN(fs->fs_maxcontig, FS_MAXCONTIG);
 				if (CGSIZE(fs) > fs->fs_bsize) {
 					pwarn("CANNOT %s CLUSTER MAPS\n", doit);
 					fs->fs_contigsumsize = i;
 				} else if (preen ||
 				    reply("CREATE CLUSTER MAPS")) {
 					if (preen)
 						pwarn("%sING CLUSTER MAPS\n",
 						    doit);
 					fs->fs_cgsize =
 					    fragroundup(fs, CGSIZE(fs));
 					doinglevel1 = 1;
 					sbdirty();
 				}
 			}
 		}
 	}
 	switch ((int)fs->fs_postblformat) {
 
 	case FS_42POSTBLFMT:
 		basesize = (char *)(&ocg->cg_btot[0]) -
 		    (char *)(&ocg->cg_firstfield);
 		sumsize = &ocg->cg_iused[0] - (u_int8_t *)(&ocg->cg_btot[0]);
 		mapsize = &ocg->cg_free[howmany(fs->fs_fpg, NBBY)] -
 			(u_char *)&ocg->cg_iused[0];
+		blkmapsize = howmany(fs->fs_fpg, NBBY);
+		inomapsize = &ocg->cg_free[0] - (u_char *)&ocg->cg_iused[0];
 		ocg->cg_magic = CG_MAGIC;
 		savednrpos = fs->fs_nrpos;
 		fs->fs_nrpos = 8;
 		break;
 
 	case FS_DYNAMICPOSTBLFMT:
 		newcg->cg_btotoff =
 		     &newcg->cg_space[0] - (u_char *)(&newcg->cg_firstfield);
 		newcg->cg_boff =
 		    newcg->cg_btotoff + fs->fs_cpg * sizeof(long);
 		newcg->cg_iusedoff = newcg->cg_boff +
 		    fs->fs_cpg * fs->fs_nrpos * sizeof(short);
 		newcg->cg_freeoff =
 		    newcg->cg_iusedoff + howmany(fs->fs_ipg, NBBY);
-		if (fs->fs_contigsumsize <= 0) {
-			newcg->cg_nextfreeoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
-		} else {
-			newcg->cg_clustersumoff = newcg->cg_freeoff +
-			    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY) -
+		inomapsize = newcg->cg_freeoff - newcg->cg_iusedoff;
+		newcg->cg_nextfreeoff = newcg->cg_freeoff +
+		    howmany(fs->fs_cpg * fs->fs_spc / NSPF(fs), NBBY);
+		blkmapsize = newcg->cg_nextfreeoff - newcg->cg_freeoff;
+		if (fs->fs_contigsumsize > 0) {
+			newcg->cg_clustersumoff = newcg->cg_nextfreeoff -
 			    sizeof(long);
 			newcg->cg_clustersumoff =
 			    roundup(newcg->cg_clustersumoff, sizeof(long));
 			newcg->cg_clusteroff = newcg->cg_clustersumoff +
 			    (fs->fs_contigsumsize + 1) * sizeof(long);
 			newcg->cg_nextfreeoff = newcg->cg_clusteroff +
 			    howmany(fs->fs_cpg * fs->fs_spc / NSPB(fs), NBBY);
 		}
 		newcg->cg_magic = CG_MAGIC;
 		basesize = &newcg->cg_space[0] -
 		    (u_char *)(&newcg->cg_firstfield);
 		sumsize = newcg->cg_iusedoff - newcg->cg_btotoff;
 		mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff;
 		break;
 
 	default:
-		sumsize = 0;	/* keep lint happy */
+		inomapsize = blkmapsize = sumsize = 0;	/* keep lint happy */
 		errx(EEXIT, "UNKNOWN ROTATIONAL TABLE FORMAT %d",
 			fs->fs_postblformat);
 	}
 	memset(&idesc[0], 0, sizeof idesc);
 	for (i = 0; i < 3; i++) {
 		idesc[i].id_type = ADDR;
 		if (doinglevel2)
 			idesc[i].id_fix = FIX;
 	}
 	memset(&cstotal, 0, sizeof(struct csum));
 	j = blknum(fs, fs->fs_size + fs->fs_frag - 1);
 	for (i = fs->fs_size; i < j; i++)
 		setbmap(i);
 	for (c = 0; c < fs->fs_ncg; c++) {
 		getblk(&cgblk, cgtod(fs, c), fs->fs_cgsize);
 		if (!cg_chkmagic(cg))
 			pfatal("CG %d: BAD MAGIC NUMBER\n", c);
 		dbase = cgbase(fs, c);
 		dmax = dbase + fs->fs_fpg;
 		if (dmax > fs->fs_size)
 			dmax = fs->fs_size;
 		newcg->cg_time = cg->cg_time;
 		newcg->cg_cgx = c;
 		if (c == fs->fs_ncg - 1)
 			newcg->cg_ncyl = fs->fs_ncyl % fs->fs_cpg;
 		else
 			newcg->cg_ncyl = fs->fs_cpg;
 		newcg->cg_ndblk = dmax - dbase;
 		if (fs->fs_contigsumsize > 0)
 			newcg->cg_nclusterblks = newcg->cg_ndblk / fs->fs_frag;
 		newcg->cg_cs.cs_ndir = 0;
 		newcg->cg_cs.cs_nffree = 0;
 		newcg->cg_cs.cs_nbfree = 0;
 		newcg->cg_cs.cs_nifree = fs->fs_ipg;
 		if (cg->cg_rotor < newcg->cg_ndblk)
 			newcg->cg_rotor = cg->cg_rotor;
 		else
 			newcg->cg_rotor = 0;
 		if (cg->cg_frotor < newcg->cg_ndblk)
 			newcg->cg_frotor = cg->cg_frotor;
 		else
 			newcg->cg_frotor = 0;
 		if (cg->cg_irotor < newcg->cg_niblk)
 			newcg->cg_irotor = cg->cg_irotor;
 		else
 			newcg->cg_irotor = 0;
 		memset(&newcg->cg_frsum[0], 0, sizeof newcg->cg_frsum);
 		memset(&cg_blktot(newcg)[0], 0,
 		      (size_t)(sumsize + mapsize));
 		if (fs->fs_postblformat == FS_42POSTBLFMT)
 			ocg->cg_magic = CG_MAGIC;
 		j = fs->fs_ipg * c;
 		for (i = 0; i < fs->fs_ipg; j++, i++) {
 			switch (statemap[j]) {
 
 			case USTATE:
 				break;
 
 			case DSTATE:
 			case DCLEAR:
 			case DFOUND:
 				newcg->cg_cs.cs_ndir++;
 				/* fall through */
 
 			case FSTATE:
 			case FCLEAR:
 				newcg->cg_cs.cs_nifree--;
 				setbit(cg_inosused(newcg), i);
 				break;
 
 			default:
 				if (j < ROOTINO)
 					break;
 				errx(EEXIT, "BAD STATE %d FOR INODE I=%d",
 				    statemap[j], j);
 			}
 		}
 		if (c == 0)
 			for (i = 0; i < ROOTINO; i++) {
 				setbit(cg_inosused(newcg), i);
 				newcg->cg_cs.cs_nifree--;
 			}
 		for (i = 0, d = dbase;
 		     d < dmax;
 		     d += fs->fs_frag, i += fs->fs_frag) {
 			frags = 0;
 			for (j = 0; j < fs->fs_frag; j++) {
 				if (testbmap(d + j))
 					continue;
 				setbit(cg_blksfree(newcg), i + j);
 				frags++;
 			}
 			if (frags == fs->fs_frag) {
 				newcg->cg_cs.cs_nbfree++;
 				j = cbtocylno(fs, i);
 				cg_blktot(newcg)[j]++;
 				cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++;
 				if (fs->fs_contigsumsize > 0)
 					setbit(cg_clustersfree(newcg),
 					    i / fs->fs_frag);
 			} else if (frags > 0) {
 				newcg->cg_cs.cs_nffree += frags;
 				blk = blkmap(fs, cg_blksfree(newcg), i);
 				ffs_fragacct(fs, blk, newcg->cg_frsum, 1);
 			}
 		}
 		if (fs->fs_contigsumsize > 0) {
 			int32_t *sump = cg_clustersum(newcg);
 			u_char *mapp = cg_clustersfree(newcg);
 			int map = *mapp++;
 			int bit = 1;
 			int run = 0;
 
 			for (i = 0; i < newcg->cg_nclusterblks; i++) {
 				if ((map & bit) != 0) {
 					run++;
 				} else if (run != 0) {
 					if (run > fs->fs_contigsumsize)
 						run = fs->fs_contigsumsize;
 					sump[run]++;
 					run = 0;
 				}
 				if ((i & (NBBY - 1)) != (NBBY - 1)) {
 					bit <<= 1;
 				} else {
 					map = *mapp++;
 					bit = 1;
 				}
 			}
 			if (run != 0) {
 				if (run > fs->fs_contigsumsize)
 					run = fs->fs_contigsumsize;
 				sump[run]++;
 			}
 		}
 		cstotal.cs_nffree += newcg->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += newcg->cg_cs.cs_nifree;
 		cstotal.cs_ndir += newcg->cg_cs.cs_ndir;
 		cs = &fs->fs_cs(fs, c);
 		if (memcmp(&newcg->cg_cs, cs, sizeof *cs) != 0 &&
 		    dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 			memmove(cs, &newcg->cg_cs, sizeof *cs);
 			sbdirty();
 		}
 		if (doinglevel1) {
 			memmove(cg, newcg, (size_t)fs->fs_cgsize);
 			cgdirty();
 			continue;
 		}
-		if (memcmp(cg_inosused(newcg),
-			 cg_inosused(cg), mapsize) != 0 &&
-		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
-			memmove(cg_inosused(cg), cg_inosused(newcg),
-			      (size_t)mapsize);
-			cgdirty();
-		}
 		if ((memcmp(newcg, cg, basesize) != 0 ||
 		     memcmp(&cg_blktot(newcg)[0],
 			  &cg_blktot(cg)[0], sumsize) != 0) &&
 		    dofix(&idesc[2], "SUMMARY INFORMATION BAD")) {
 			memmove(cg, newcg, (size_t)basesize);
 			memmove(&cg_blktot(cg)[0],
 			       &cg_blktot(newcg)[0], (size_t)sumsize);
+			cgdirty();
+		}
+		if (usedsoftdep) {
+			for (i = 0; i < inomapsize; i++) {
+				j = cg_inosused(newcg)[i];
+				if ((cg_inosused(cg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED INODE %d MARKED FREE",
+					    c * fs->fs_ipg + i * 8 + k);
+				}
+			}
+			for (i = 0; i < blkmapsize; i++) {
+				j = cg_blksfree(cg)[i];
+				if ((cg_blksfree(newcg)[i] & j) == j)
+					continue;
+				for (k = 0; k < NBBY; k++) {
+					if ((j & (1 << k)) == 0)
+						continue;
+					if (cg_inosused(cg)[i] & (1 << k))
+						continue;
+					pwarn("ALLOCATED FRAG %d MARKED FREE",
+					    c * fs->fs_fpg + i * 8 + k);
+				}
+			}
+		}
+		if (memcmp(cg_inosused(newcg), cg_inosused(cg), mapsize) != 0 &&
+		    dofix(&idesc[1], "BLK(S) MISSING IN BIT MAPS")) {
+			memmove(cg_inosused(cg), cg_inosused(newcg),
+			      (size_t)mapsize);
 			cgdirty();
 		}
 	}
 	if (fs->fs_postblformat == FS_42POSTBLFMT)
 		fs->fs_nrpos = savednrpos;
 	if (memcmp(&cstotal, &fs->fs_cstotal, sizeof *cs) != 0
 	    && dofix(&idesc[0], "FREE BLK COUNT(S) WRONG IN SUPERBLK")) {
 		memmove(&fs->fs_cstotal, &cstotal, sizeof *cs);
 		fs->fs_ronly = 0;
 		sbdirty();
 	}
 	if (fs->fs_fmod != 0) {
 		pwarn("MODIFIED FLAG SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_fmod = 0;
 			sbdirty();
 		}
 	}
 	if (fs->fs_clean == 0) {
 		pwarn("CLEAN FLAG NOT SET IN SUPERBLOCK");
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("FIX") == 1) {
 			fs->fs_clean = 1;
 			sbdirty();
 		}
 	}
 }
Index: head/sbin/fsck_ifs/setup.c
===================================================================
--- head/sbin/fsck_ifs/setup.c	(revision 34265)
+++ head/sbin/fsck_ifs/setup.c	(revision 34266)
@@ -1,514 +1,520 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)setup.c	8.10 (Berkeley) 5/9/95";
 #endif /* not lint */
 
 #define DKTYPENAMES
 #include <sys/param.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 #include <sys/disklabel.h>
 #include <sys/file.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
 #include <string.h>
 
 #include "fsck.h"
 
 struct bufarea asblk;
 #define altsblock (*asblk.b_un.b_fs)
 #define POWEROF2(num)	(((num) & ((num) - 1)) == 0)
 
 static void badsb __P((int listerr, char *s));
 static int calcsb __P((char *dev, int devfd, struct fs *fs));
 static struct disklabel *getdisklabel __P((char *s, int fd));
 static int readsb __P((int listerr));
 
 /*
  * Read in a superblock finding an alternate if necessary.
  * Return 1 if successful, 0 if unsuccessful, -1 if filesystem
  * is already clean (preen mode only).
  */
 int
 setup(dev)
 	char *dev;
 {
 	long cg, size, asked, i, j;
 	long skipclean, bmapsize;
 	struct disklabel *lp;
 	off_t sizepb;
 	struct stat statb;
 	struct fs proto;
 
 	havesb = 0;
 	fswritefd = -1;
 	skipclean = preen;
 	if (stat(dev, &statb) < 0) {
 		printf("Can't stat %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if ((statb.st_mode & S_IFMT) != S_IFCHR) {
 		pfatal("%s is not a character device", dev);
 		if (reply("CONTINUE") == 0)
 			return (0);
 	}
 	if ((fsreadfd = open(dev, O_RDONLY)) < 0) {
 		printf("Can't open %s: %s\n", dev, strerror(errno));
 		return (0);
 	}
 	if (preen == 0)
 		printf("** %s", dev);
 	if (nflag || (fswritefd = open(dev, O_WRONLY)) < 0) {
 		fswritefd = -1;
 		if (preen)
 			pfatal("NO WRITE ACCESS");
 		printf(" (NO WRITE)");
 	}
 	if (preen == 0)
 		printf("\n");
 	fsmodified = 0;
 	lfdir = 0;
 	initbarea(&sblk);
 	initbarea(&asblk);
 	sblk.b_un.b_buf = malloc(SBSIZE);
 	asblk.b_un.b_buf = malloc(SBSIZE);
 	if (sblk.b_un.b_buf == NULL || asblk.b_un.b_buf == NULL)
 		errx(EEXIT, "cannot allocate space for superblock");
 	lp = getdisklabel((char *)NULL, fsreadfd);
 	if (lp)
 		dev_bsize = secsize = lp->d_secsize;
 	else
 		dev_bsize = secsize = DEV_BSIZE;
 	/*
 	 * Read in the superblock, looking for alternates if necessary
 	 */
 	if (readsb(1) == 0) {
 		skipclean = 0;
 		if (bflag || preen || calcsb(dev, fsreadfd, &proto) == 0)
 			return(0);
 		if (reply("LOOK FOR ALTERNATE SUPERBLOCKS") == 0)
 			return (0);
 		for (cg = 0; cg < proto.fs_ncg; cg++) {
 			bflag = fsbtodb(&proto, cgsblock(&proto, cg));
 			if (readsb(0) != 0)
 				break;
 		}
 		if (cg >= proto.fs_ncg) {
 			printf("%s %s\n%s %s\n%s %s\n",
 				"SEARCH FOR ALTERNATE SUPER-BLOCK",
 				"FAILED. YOU MUST USE THE",
 				"-b OPTION TO FSCK TO SPECIFY THE",
 				"LOCATION OF AN ALTERNATE",
 				"SUPER-BLOCK TO SUPPLY NEEDED",
 				"INFORMATION; SEE fsck(8).");
 			bflag = 0;
 			return(0);
 		}
 		pwarn("USING ALTERNATE SUPERBLOCK AT %d\n", bflag);
 		bflag = 0;
 	}
 	maxfsblock = sblock.fs_size;
 	maxino = sblock.fs_ncg * sblock.fs_ipg;
 	/*
 	 * Check and potentially fix certain fields in the super block.
 	 */
 	if (sblock.fs_optim != FS_OPTTIME && sblock.fs_optim != FS_OPTSPACE) {
 		pfatal("UNDEFINED OPTIMIZATION IN SUPERBLOCK");
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_optim = FS_OPTTIME;
 			sbdirty();
 		}
 	}
 	if ((sblock.fs_minfree < 0 || sblock.fs_minfree > 99)) {
 		pfatal("IMPOSSIBLE MINFREE=%d IN SUPERBLOCK",
 			sblock.fs_minfree);
 		if (reply("SET TO DEFAULT") == 1) {
 			sblock.fs_minfree = 10;
 			sbdirty();
 		}
 	}
 	if (sblock.fs_interleave < 1 ||
 	    sblock.fs_interleave > sblock.fs_nsect) {
 		pwarn("IMPOSSIBLE INTERLEAVE=%d IN SUPERBLOCK",
 			sblock.fs_interleave);
 		sblock.fs_interleave = 1;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_npsect < sblock.fs_nsect ||
 	    sblock.fs_npsect > sblock.fs_nsect*2) {
 		pwarn("IMPOSSIBLE NPSECT=%d IN SUPERBLOCK",
 			sblock.fs_npsect);
 		sblock.fs_npsect = sblock.fs_nsect;
 		if (preen)
 			printf(" (FIXED)\n");
 		if (preen || reply("SET TO DEFAULT") == 1) {
 			sbdirty();
 			dirty(&asblk);
 		}
 	}
 	if (sblock.fs_inodefmt >= FS_44INODEFMT) {
 		newinofmt = 1;
 	} else {
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		newinofmt = 0;
 	}
 	/*
 	 * Convert to new inode format.
 	 */
 	if (cvtlevel >= 2 && sblock.fs_inodefmt < FS_44INODEFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW INODE FORMAT\n");
 		else if (!reply("CONVERT TO NEW INODE FORMAT"))
 			return(0);
 		doinglevel2++;
 		sblock.fs_inodefmt = FS_44INODEFMT;
 		sizepb = sblock.fs_bsize;
 		sblock.fs_maxfilesize = sblock.fs_bsize * NDADDR - 1;
 		for (i = 0; i < NIADDR; i++) {
 			sizepb *= NINDIR(&sblock);
 			sblock.fs_maxfilesize += sizepb;
 		}
 		sblock.fs_maxsymlinklen = MAXSYMLINKLEN;
 		sblock.fs_qbmask = ~sblock.fs_bmask;
 		sblock.fs_qfmask = ~sblock.fs_fmask;
 		sbdirty();
 		dirty(&asblk);
 	}
 	/*
 	 * Convert to new cylinder group format.
 	 */
 	if (cvtlevel >= 1 && sblock.fs_postblformat == FS_42POSTBLFMT) {
 		if (preen)
 			pwarn("CONVERTING TO NEW CYLINDER GROUP FORMAT\n");
 		else if (!reply("CONVERT TO NEW CYLINDER GROUP FORMAT"))
 			return(0);
 		doinglevel1++;
 		sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
 		sblock.fs_nrpos = 8;
 		sblock.fs_postbloff =
 		    (char *)(&sblock.fs_opostbl[0][0]) -
 		    (char *)(&sblock.fs_firstfield);
 		sblock.fs_rotbloff = &sblock.fs_space[0] -
 		    (u_char *)(&sblock.fs_firstfield);
 		sblock.fs_cgsize =
 			fragroundup(&sblock, CGSIZE(&sblock));
 		sbdirty();
 		dirty(&asblk);
 	}
 	if (asblk.b_dirty && !bflag) {
 		memmove(&altsblock, &sblock, (size_t)sblock.fs_sbsize);
 		flush(fswritefd, &asblk);
 	}
 	/*
 	 * read in the summary info.
 	 */
 	asked = 0;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		size = sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize;
 		sblock.fs_csp[j] = (struct csum *)calloc(1, (unsigned)size);
 		if (bread(fsreadfd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    size) != 0 && !asked) {
 			pfatal("BAD SUMMARY INFORMATION");
-			if (reply("CONTINUE") == 0)
+			if (reply("CONTINUE") == 0) {
+				ckfini(0);
 				exit(EEXIT);
+			}
 			asked++;
 		}
 	}
 	/*
 	 * If we survive the above basic checks and are preening,
 	 * quit here unless forced.
 	 */
 	if (skipclean && sblock.fs_clean && !fflag)
 		return (-1);
 	/*
 	 * allocate and initialize the necessary maps
 	 */
 	bmapsize = roundup(howmany(maxfsblock, NBBY), sizeof(short));
 	blockmap = calloc((unsigned)bmapsize, sizeof (char));
 	if (blockmap == NULL) {
 		printf("cannot alloc %u bytes for blockmap\n",
 		    (unsigned)bmapsize);
 		goto badsb;
 	}
 	statemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (statemap == NULL) {
 		printf("cannot alloc %u bytes for statemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	typemap = calloc((unsigned)(maxino + 1), sizeof(char));
 	if (typemap == NULL) {
 		printf("cannot alloc %u bytes for typemap\n",
 		    (unsigned)(maxino + 1));
 		goto badsb;
 	}
 	lncntp = (short *)calloc((unsigned)(maxino + 1), sizeof(short));
 	if (lncntp == NULL) {
 		printf("cannot alloc %u bytes for lncntp\n",
 		    (unsigned)(maxino + 1) * sizeof(short));
 		goto badsb;
 	}
 	numdirs = sblock.fs_cstotal.cs_ndir;
 	if (numdirs == 0) {
 		printf("numdirs is zero, try using an alternate superblock\n");
 		goto badsb;
 	}
 	inplast = 0;
 	listmax = numdirs + 10;
 	inpsort = (struct inoinfo **)calloc((unsigned)listmax,
 	    sizeof(struct inoinfo *));
 	inphead = (struct inoinfo **)calloc((unsigned)numdirs,
 	    sizeof(struct inoinfo *));
 	if (inpsort == NULL || inphead == NULL) {
 		printf("cannot alloc %u bytes for inphead\n",
 		    (unsigned)numdirs * sizeof(struct inoinfo *));
 		goto badsb;
 	}
 	bufinit();
+	if (sblock.fs_flags & FS_DOSOFTDEP)
+		usedsoftdep = 1;
+	else
+		usedsoftdep = 0;
 	return (1);
 
 badsb:
 	ckfini(0);
 	return (0);
 }
 
 /*
  * Read in the super block and its summary info.
  */
 static int
 readsb(listerr)
 	int listerr;
 {
 	ufs_daddr_t super = bflag ? bflag : SBOFF / dev_bsize;
 
 	if (bread(fsreadfd, (char *)&sblock, super, (long)SBSIZE) != 0)
 		return (0);
 	sblk.b_bno = super;
 	sblk.b_size = SBSIZE;
 	/*
 	 * run a few consistency checks of the super block
 	 */
 	if (sblock.fs_magic != FS_MAGIC)
 		{ badsb(listerr, "MAGIC NUMBER WRONG"); return (0); }
 	if (sblock.fs_ncg < 1)
 		{ badsb(listerr, "NCG OUT OF RANGE"); return (0); }
 	if (sblock.fs_cpg < 1)
 		{ badsb(listerr, "CPG OUT OF RANGE"); return (0); }
 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl)
 		{ badsb(listerr, "NCYL LESS THAN NCG*CPG"); return (0); }
 	if (sblock.fs_sbsize > SBSIZE)
 		{ badsb(listerr, "SIZE PREPOSTEROUSLY LARGE"); return (0); }
 	/*
 	 * Compute block size that the filesystem is based on,
 	 * according to fsbtodb, and adjust superblock block number
 	 * so we can tell if this is an alternate later.
 	 */
 	super *= dev_bsize;
 	dev_bsize = sblock.fs_fsize / fsbtodb(&sblock, 1);
 	sblk.b_bno = super / dev_bsize;
 	if (bflag) {
 		havesb = 1;
 		return (1);
 	}
 	/*
 	 * Set all possible fields that could differ, then do check
 	 * of whole super block against an alternate super block.
 	 * When an alternate super-block is specified this check is skipped.
 	 */
 	getblk(&asblk, cgsblock(&sblock, sblock.fs_ncg - 1), sblock.fs_sbsize);
 	if (asblk.b_errs)
 		return (0);
 	altsblock.fs_firstfield = sblock.fs_firstfield;
 	altsblock.fs_unused_1 = sblock.fs_unused_1;
 	altsblock.fs_time = sblock.fs_time;
 	altsblock.fs_cstotal = sblock.fs_cstotal;
 	altsblock.fs_cgrotor = sblock.fs_cgrotor;
 	altsblock.fs_fmod = sblock.fs_fmod;
 	altsblock.fs_clean = sblock.fs_clean;
 	altsblock.fs_ronly = sblock.fs_ronly;
 	altsblock.fs_flags = sblock.fs_flags;
 	altsblock.fs_maxcontig = sblock.fs_maxcontig;
 	altsblock.fs_minfree = sblock.fs_minfree;
 	altsblock.fs_optim = sblock.fs_optim;
 	altsblock.fs_rotdelay = sblock.fs_rotdelay;
 	altsblock.fs_maxbpg = sblock.fs_maxbpg;
 	memmove(altsblock.fs_csp, sblock.fs_csp, sizeof sblock.fs_csp);
 	altsblock.fs_maxcluster = sblock.fs_maxcluster;
 	memmove(altsblock.fs_fsmnt, sblock.fs_fsmnt, sizeof sblock.fs_fsmnt);
 	memmove(altsblock.fs_sparecon,
 		sblock.fs_sparecon, sizeof sblock.fs_sparecon);
 	/*
 	 * The following should not have to be copied.
 	 */
 	altsblock.fs_fsbtodb = sblock.fs_fsbtodb;
 	altsblock.fs_interleave = sblock.fs_interleave;
 	altsblock.fs_npsect = sblock.fs_npsect;
 	altsblock.fs_nrpos = sblock.fs_nrpos;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_qbmask = sblock.fs_qbmask;
 	altsblock.fs_qfmask = sblock.fs_qfmask;
 	altsblock.fs_state = sblock.fs_state;
 	altsblock.fs_maxfilesize = sblock.fs_maxfilesize;
 	if (memcmp(&sblock, &altsblock, (int)sblock.fs_sbsize)) {
 		if (debug) {
 			long *nlp, *olp, *endlp;
 
 			printf("superblock mismatches\n");
 			nlp = (long *)&altsblock;
 			olp = (long *)&sblock;
 			endlp = olp + (sblock.fs_sbsize / sizeof *olp);
 			for ( ; olp < endlp; olp++, nlp++) {
 				if (*olp == *nlp)
 					continue;
 				printf("offset %d, original %d, alternate %d\n",
 				    olp - (long *)&sblock, *olp, *nlp);
 			}
 		}
 		badsb(listerr,
 		"VALUES IN SUPER BLOCK DISAGREE WITH THOSE IN FIRST ALTERNATE");
 		return (0);
 	}
 	havesb = 1;
 	return (1);
 }
 
 static void
 badsb(listerr, s)
 	int listerr;
 	char *s;
 {
 
 	if (!listerr)
 		return;
 	if (preen)
 		printf("%s: ", cdevname);
 	pfatal("BAD SUPER BLOCK: %s\n", s);
 }
 
 /*
  * Calculate a prototype superblock based on information in the disk label.
  * When done the cgsblock macro can be calculated and the fs_ncg field
  * can be used. Do NOT attempt to use other macros without verifying that
  * their needed information is available!
  */
 static int
 calcsb(dev, devfd, fs)
 	char *dev;
 	int devfd;
 	register struct fs *fs;
 {
 	register struct disklabel *lp;
 	register struct partition *pp;
 	register char *cp;
 	int i;
 
 	cp = strchr(dev, '\0') - 1;
 	if (cp == (char *)-1 || ((*cp < 'a' || *cp > 'h') && !isdigit(*cp))) {
 		pfatal("%s: CANNOT FIGURE OUT FILE SYSTEM PARTITION\n", dev);
 		return (0);
 	}
 	lp = getdisklabel(dev, devfd);
 	if (isdigit(*cp))
 		pp = &lp->d_partitions[0];
 	else
 		pp = &lp->d_partitions[*cp - 'a'];
 	if (pp->p_fstype != FS_BSDFFS) {
 		pfatal("%s: NOT LABELED AS A BSD FILE SYSTEM (%s)\n",
 			dev, pp->p_fstype < FSMAXTYPES ?
 			fstypenames[pp->p_fstype] : "unknown");
 		return (0);
 	}
 	if (pp->p_fsize == 0 || pp->p_frag == 0) {
 		pfatal("%s: LABELED AS A %s FILE SYSTEM, BUT BLOCK SIZE IS 0\n",
 			dev, fstypenames[pp->p_fstype]);
 		return (0);
 	}
 	memset(fs, 0, sizeof(struct fs));
 	fs->fs_fsize = pp->p_fsize;
 	fs->fs_frag = pp->p_frag;
 	fs->fs_cpg = pp->p_cpg;
 	fs->fs_size = pp->p_size;
 	fs->fs_ntrak = lp->d_ntracks;
 	fs->fs_nsect = lp->d_nsectors;
 	fs->fs_spc = lp->d_secpercyl;
 	fs->fs_nspf = fs->fs_fsize / lp->d_secsize;
 	fs->fs_sblkno = roundup(
 		howmany(lp->d_bbsize + lp->d_sbsize, fs->fs_fsize),
 		fs->fs_frag);
 	fs->fs_cgmask = 0xffffffff;
 	for (i = fs->fs_ntrak; i > 1; i >>= 1)
 		fs->fs_cgmask <<= 1;
 	if (!POWEROF2(fs->fs_ntrak))
 		fs->fs_cgmask <<= 1;
 	fs->fs_cgoffset = roundup(
 		howmany(fs->fs_nsect, NSPF(fs)), fs->fs_frag);
 	fs->fs_fpg = (fs->fs_cpg * fs->fs_spc) / NSPF(fs);
 	fs->fs_ncg = howmany(fs->fs_size / fs->fs_spc, fs->fs_cpg);
 	for (fs->fs_fsbtodb = 0, i = NSPF(fs); i > 1; i >>= 1)
 		fs->fs_fsbtodb++;
 	dev_bsize = lp->d_secsize;
 	return (1);
 }
 
 static struct disklabel *
 getdisklabel(s, fd)
 	char *s;
 	int	fd;
 {
 	static struct disklabel lab;
 
 	if (ioctl(fd, DIOCGDINFO, (char *)&lab) < 0) {
 		if (s == NULL)
 			return ((struct disklabel *)NULL);
 		pwarn("ioctl (GCINFO): %s\n", strerror(errno));
 		errx(EEXIT, "%s: can't read disk label", s);
 	}
 	return (&lab);
 }
Index: head/sbin/fsck_ifs/utilities.c
===================================================================
--- head/sbin/fsck_ifs/utilities.c	(revision 34265)
+++ head/sbin/fsck_ifs/utilities.c	(revision 34266)
@@ -1,625 +1,648 @@
 /*
  * Copyright (c) 1980, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char sccsid[] = "@(#)utilities.c	8.6 (Berkeley) 5/19/95";
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/time.h>
 
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <ctype.h>
 #include <err.h>
 #include <string.h>
 
 #include "fsck.h"
 
 long	diskreads, totalreads;	/* Disk cache statistics */
 
 static void rwerror __P((char *mesg, ufs_daddr_t blk));
 
 int
 ftypeok(dp)
 	struct dinode *dp;
 {
 	switch (dp->di_mode & IFMT) {
 
 	case IFDIR:
 	case IFREG:
 	case IFBLK:
 	case IFCHR:
 	case IFLNK:
 	case IFSOCK:
 	case IFIFO:
 		return (1);
 
 	default:
 		if (debug)
 			printf("bad file type 0%o\n", dp->di_mode);
 		return (0);
 	}
 }
 
 int
 reply(question)
 	char *question;
 {
 	int persevere;
 	char c;
 
 	if (preen)
 		pfatal("INTERNAL ERROR: GOT TO reply()");
 	persevere = !strcmp(question, "CONTINUE");
 	printf("\n");
 	if (!persevere && (nflag || fswritefd < 0)) {
 		printf("%s? no\n\n", question);
+		resolved = 0;
 		return (0);
 	}
 	if (yflag || (persevere && nflag)) {
 		printf("%s? yes\n\n", question);
 		return (1);
 	}
 	do	{
 		printf("%s? [yn] ", question);
 		(void) fflush(stdout);
 		c = getc(stdin);
-		while (c != '\n' && getc(stdin) != '\n')
-			if (feof(stdin))
+		while (c != '\n' && getc(stdin) != '\n') {
+			if (feof(stdin)) {
+				resolved = 0;
 				return (0);
+			}
+		}
 	} while (c != 'y' && c != 'Y' && c != 'n' && c != 'N');
 	printf("\n");
 	if (c == 'y' || c == 'Y')
 		return (1);
+	resolved = 0;
 	return (0);
 }
 
 /*
  * Malloc buffers and set up cache.
  */
 void
 bufinit()
 {
 	register struct bufarea *bp;
 	long bufcnt, i;
 	char *bufp;
 
 	pbp = pdirbp = (struct bufarea *)0;
 	bufp = malloc((unsigned int)sblock.fs_bsize);
 	if (bufp == 0)
 		errx(EEXIT, "cannot allocate buffer pool");
 	cgblk.b_un.b_buf = bufp;
 	initbarea(&cgblk);
 	bufhead.b_next = bufhead.b_prev = &bufhead;
 	bufcnt = MAXBUFSPACE / sblock.fs_bsize;
 	if (bufcnt < MINBUFS)
 		bufcnt = MINBUFS;
 	for (i = 0; i < bufcnt; i++) {
 		bp = (struct bufarea *)malloc(sizeof(struct bufarea));
 		bufp = malloc((unsigned int)sblock.fs_bsize);
 		if (bp == NULL || bufp == NULL) {
 			if (i >= MINBUFS)
 				break;
 			errx(EEXIT, "cannot allocate buffer pool");
 		}
 		bp->b_un.b_buf = bufp;
 		bp->b_prev = &bufhead;
 		bp->b_next = bufhead.b_next;
 		bufhead.b_next->b_prev = bp;
 		bufhead.b_next = bp;
 		initbarea(bp);
 	}
 	bufhead.b_size = i;	/* save number of buffers */
 }
 
 /*
  * Manage a cache of directory blocks.
  */
 struct bufarea *
 getdatablk(blkno, size)
 	ufs_daddr_t blkno;
 	long size;
 {
 	register struct bufarea *bp;
 
 	for (bp = bufhead.b_next; bp != &bufhead; bp = bp->b_next)
 		if (bp->b_bno == fsbtodb(&sblock, blkno))
 			goto foundit;
 	for (bp = bufhead.b_prev; bp != &bufhead; bp = bp->b_prev)
 		if ((bp->b_flags & B_INUSE) == 0)
 			break;
 	if (bp == &bufhead)
 		errx(EEXIT, "deadlocked buffer pool");
 	getblk(bp, blkno, size);
 	/* fall through */
 foundit:
 	totalreads++;
 	bp->b_prev->b_next = bp->b_next;
 	bp->b_next->b_prev = bp->b_prev;
 	bp->b_prev = &bufhead;
 	bp->b_next = bufhead.b_next;
 	bufhead.b_next->b_prev = bp;
 	bufhead.b_next = bp;
 	bp->b_flags |= B_INUSE;
 	return (bp);
 }
 
 void
 getblk(bp, blk, size)
 	register struct bufarea *bp;
 	ufs_daddr_t blk;
 	long size;
 {
 	ufs_daddr_t dblk;
 
 	dblk = fsbtodb(&sblock, blk);
 	if (bp->b_bno != dblk) {
 		flush(fswritefd, bp);
 		diskreads++;
 		bp->b_errs = bread(fsreadfd, bp->b_un.b_buf, dblk, size);
 		bp->b_bno = dblk;
 		bp->b_size = size;
 	}
 }
 
 void
 flush(fd, bp)
 	int fd;
 	register struct bufarea *bp;
 {
 	register int i, j;
 
 	if (!bp->b_dirty)
 		return;
 	if (bp->b_errs != 0)
 		pfatal("WRITING %sZERO'ED BLOCK %d TO DISK\n",
 		    (bp->b_errs == bp->b_size / dev_bsize) ? "" : "PARTIALLY ",
 		    bp->b_bno);
 	bp->b_dirty = 0;
 	bp->b_errs = 0;
 	bwrite(fd, bp->b_un.b_buf, bp->b_bno, (long)bp->b_size);
 	if (bp != &sblk)
 		return;
 	for (i = 0, j = 0; i < sblock.fs_cssize; i += sblock.fs_bsize, j++) {
 		bwrite(fswritefd, (char *)sblock.fs_csp[j],
 		    fsbtodb(&sblock, sblock.fs_csaddr + j * sblock.fs_frag),
 		    sblock.fs_cssize - i < sblock.fs_bsize ?
 		    sblock.fs_cssize - i : sblock.fs_bsize);
 	}
 }
 
 static void
 rwerror(mesg, blk)
 	char *mesg;
 	ufs_daddr_t blk;
 {
 
 	if (preen == 0)
 		printf("\n");
 	pfatal("CANNOT %s: BLK %ld", mesg, blk);
 	if (reply("CONTINUE") == 0)
 		exit(EEXIT);
 }
 
 void
 ckfini(markclean)
 	int markclean;
 {
 	register struct bufarea *bp, *nbp;
 	int ofsmodified, cnt = 0;
 
 	if (fswritefd < 0) {
 		(void)close(fsreadfd);
 		return;
 	}
 	flush(fswritefd, &sblk);
 	if (havesb && sblk.b_bno != SBOFF / dev_bsize &&
 	    !preen && reply("UPDATE STANDARD SUPERBLOCK")) {
 		sblk.b_bno = SBOFF / dev_bsize;
 		sbdirty();
 		flush(fswritefd, &sblk);
 	}
 	flush(fswritefd, &cgblk);
 	free(cgblk.b_un.b_buf);
 	for (bp = bufhead.b_prev; bp && bp != &bufhead; bp = nbp) {
 		cnt++;
 		flush(fswritefd, bp);
 		nbp = bp->b_prev;
 		free(bp->b_un.b_buf);
 		free((char *)bp);
 	}
 	if (bufhead.b_size != cnt)
 		errx(EEXIT, "Panic: lost %d buffers", bufhead.b_size - cnt);
 	pbp = pdirbp = (struct bufarea *)0;
 	if (markclean && sblock.fs_clean == 0) {
 		sblock.fs_clean = 1;
 		sbdirty();
 		ofsmodified = fsmodified;
 		flush(fswritefd, &sblk);
 		fsmodified = ofsmodified;
 		if (!preen)
 			printf("\n***** FILE SYSTEM MARKED CLEAN *****\n");
 	}
 	if (debug)
 		printf("cache missed %ld of %ld (%d%%)\n", diskreads,
 		    totalreads, (int)(diskreads * 100 / totalreads));
 	(void)close(fsreadfd);
 	(void)close(fswritefd);
 }
 
 int
 bread(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	char *cp;
 	int i, errs;
 	off_t offset;
 
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (read(fd, buf, (int)size) == size)
 		return (0);
 	rwerror("READ", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	errs = 0;
 	memset(buf, 0, (size_t)size);
 	printf("THE FOLLOWING DISK SECTORS COULD NOT BE READ:");
 	for (cp = buf, i = 0; i < size; i += secsize, cp += secsize) {
 		if (read(fd, cp, (int)secsize) != secsize) {
 			(void)lseek(fd, offset + i + secsize, 0);
 			if (secsize != dev_bsize && dev_bsize != 1)
 				printf(" %ld (%ld),",
 				    (blk * dev_bsize + i) / secsize,
 				    blk + i / dev_bsize);
 			else
 				printf(" %ld,", blk + i / dev_bsize);
 			errs++;
 		}
 	}
 	printf("\n");
 	return (errs);
 }
 
 void
 bwrite(fd, buf, blk, size)
 	int fd;
 	char *buf;
 	ufs_daddr_t blk;
 	long size;
 {
 	int i;
 	char *cp;
 	off_t offset;
 
 	if (fd < 0)
 		return;
 	offset = blk;
 	offset *= dev_bsize;
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	else if (write(fd, buf, (int)size) == size) {
 		fsmodified = 1;
 		return;
 	}
 	rwerror("WRITE", blk);
 	if (lseek(fd, offset, 0) < 0)
 		rwerror("SEEK", blk);
 	printf("THE FOLLOWING SECTORS COULD NOT BE WRITTEN:");
 	for (cp = buf, i = 0; i < size; i += dev_bsize, cp += dev_bsize)
 		if (write(fd, cp, (int)dev_bsize) != dev_bsize) {
 			(void)lseek(fd, offset + i + dev_bsize, 0);
 			printf(" %ld,", blk + i / dev_bsize);
 		}
 	printf("\n");
 	return;
 }
 
 /*
  * allocate a data block with the specified number of fragments
  */
 ufs_daddr_t
 allocblk(frags)
 	long frags;
 {
-	register int i, j, k;
+	int i, j, k, cg, baseblk;
+	struct cg *cgp = &cgrp;
 
 	if (frags <= 0 || frags > sblock.fs_frag)
 		return (0);
 	for (i = 0; i < maxfsblock - sblock.fs_frag; i += sblock.fs_frag) {
 		for (j = 0; j <= sblock.fs_frag - frags; j++) {
 			if (testbmap(i + j))
 				continue;
 			for (k = 1; k < frags; k++)
 				if (testbmap(i + j + k))
 					break;
 			if (k < frags) {
 				j += k;
 				continue;
 			}
-			for (k = 0; k < frags; k++)
+			cg = dtog(&sblock, i + j);
+			getblk(&cgblk, cgtod(&sblock, cg), sblock.fs_cgsize);
+			if (!cg_chkmagic(cgp))
+				pfatal("CG %d: BAD MAGIC NUMBER\n", cg);
+			baseblk = dtogd(&sblock, i + j);
+			for (k = 0; k < frags; k++) {
 				setbmap(i + j + k);
+				clrbit(cg_blksfree(cgp), baseblk + k);
+			}
 			n_blks += frags;
+			if (frags == sblock.fs_frag)
+				cgp->cg_cs.cs_nbfree--;
+			else
+				cgp->cg_cs.cs_nffree -= frags;
+			cgdirty();
 			return (i + j);
 		}
 	}
 	return (0);
 }
 
 /*
  * Free a previously allocated block
  */
 void
 freeblk(blkno, frags)
 	ufs_daddr_t blkno;
 	long frags;
 {
 	struct inodesc idesc;
 
 	idesc.id_blkno = blkno;
 	idesc.id_numfrags = frags;
 	(void)pass4check(&idesc);
 }
 
 /*
  * Find a pathname
  */
 void
 getpathname(namebuf, curdir, ino)
 	char *namebuf;
 	ino_t curdir, ino;
 {
 	int len;
 	register char *cp;
 	struct inodesc idesc;
 	static int busy = 0;
 
 	if (curdir == ino && ino == ROOTINO) {
 		(void)strcpy(namebuf, "/");
 		return;
 	}
 	if (busy ||
 	    (statemap[curdir] != DSTATE && statemap[curdir] != DFOUND)) {
 		(void)strcpy(namebuf, "?");
 		return;
 	}
 	busy = 1;
 	memset(&idesc, 0, sizeof(struct inodesc));
 	idesc.id_type = DATA;
 	idesc.id_fix = IGNORE;
 	cp = &namebuf[MAXPATHLEN - 1];
 	*cp = '\0';
 	if (curdir != ino) {
 		idesc.id_parent = curdir;
 		goto namelookup;
 	}
 	while (ino != ROOTINO) {
 		idesc.id_number = ino;
 		idesc.id_func = findino;
 		idesc.id_name = "..";
 		if ((ckinode(ginode(ino), &idesc) & FOUND) == 0)
 			break;
 	namelookup:
 		idesc.id_number = idesc.id_parent;
 		idesc.id_parent = ino;
 		idesc.id_func = findname;
 		idesc.id_name = namebuf;
 		if ((ckinode(ginode(idesc.id_number), &idesc)&FOUND) == 0)
 			break;
 		len = strlen(namebuf);
 		cp -= len;
 		memmove(cp, namebuf, (size_t)len);
 		*--cp = '/';
 		if (cp < &namebuf[MAXNAMLEN])
 			break;
 		ino = idesc.id_number;
 	}
 	busy = 0;
 	if (ino != ROOTINO)
 		*--cp = '?';
 	memmove(namebuf, cp, (size_t)(&namebuf[MAXPATHLEN] - cp));
 }
 
 void
 catch(sig)
 	int sig;
 {
 	if (!doinglevel2)
 		ckfini(0);
 	exit(12);
 }
 
 /*
  * When preening, allow a single quit to signal
  * a special exit after filesystem checks complete
  * so that reboot sequence may be interrupted.
  */
 void
 catchquit(sig)
 	int sig;
 {
 	printf("returning to single-user after filesystem check\n");
 	returntosingle = 1;
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * Ignore a single quit signal; wait and flush just in case.
  * Used by child processes in preen.
  */
 void
 voidquit(sig)
 	int sig;
 {
 
 	sleep(1);
 	(void)signal(SIGQUIT, SIG_IGN);
 	(void)signal(SIGQUIT, SIG_DFL);
 }
 
 /*
  * determine whether an inode should be fixed.
  */
 int
 dofix(idesc, msg)
 	register struct inodesc *idesc;
 	char *msg;
 {
 
 	switch (idesc->id_fix) {
 
 	case DONTKNOW:
 		if (idesc->id_type == DATA)
 			direrror(idesc->id_number, msg);
 		else
 			pwarn(msg);
 		if (preen) {
 			printf(" (SALVAGED)\n");
 			idesc->id_fix = FIX;
 			return (ALTERED);
 		}
 		if (reply("SALVAGE") == 0) {
 			idesc->id_fix = NOFIX;
 			return (0);
 		}
 		idesc->id_fix = FIX;
 		return (ALTERED);
 
 	case FIX:
 		return (ALTERED);
 
 	case NOFIX:
 	case IGNORE:
 		return (0);
 
 	default:
 		errx(EEXIT, "UNKNOWN INODESC FIX MODE %d", idesc->id_fix);
 	}
 	/* NOTREACHED */
 	return (0);
 }
 
 #if __STDC__
 #include <stdarg.h>
 #else
 #include <varargs.h>
 #endif
 
 /*
  * An unexpected inconsistency occured.
- * Die if preening, otherwise just print message and continue.
+ * Die if preening or filesystem is running with soft dependency protocol,
+ * otherwise just print message and continue.
  */
 void
 #if __STDC__
 pfatal(const char *fmt, ...)
 #else
 pfatal(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (!preen) {
 		(void)vfprintf(stderr, fmt, ap);
 		va_end(ap);
+		if (usedsoftdep)
+			(void)fprintf(stderr,
+			    "\nUNEXPECTED SOFTDEP INCONSISTENCY\n");
 		return;
 	}
 	(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	(void)fprintf(stderr,
-	    "\n%s: UNEXPECTED INCONSISTENCY; RUN fsck MANUALLY.\n",
-	    cdevname);
+	    "\n%s: UNEXPECTED%sINCONSISTENCY; RUN fsck MANUALLY.\n",
+	    cdevname, usedsoftdep ? " SOFTDEP " : " ");
+	ckfini(0);
 	exit(EEXIT);
 }
 
 /*
- * Pwarn just prints a message when not preening,
- * or a warning (preceded by filename) when preening.
+ * Pwarn just prints a message when not preening or running soft dependency
+ * protocol, or a warning (preceded by filename) when preening.
  */
 void
 #if __STDC__
 pwarn(const char *fmt, ...)
 #else
 pwarn(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	if (preen)
 		(void)fprintf(stderr, "%s: ", cdevname);
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 }
 
 /*
  * Stub for routines from kernel.
  */
 void
 #if __STDC__
 panic(const char *fmt, ...)
 #else
 panic(fmt, va_alist)
 	char *fmt;
 	va_dcl
 #endif
 {
 	va_list ap;
 #if __STDC__
 	va_start(ap, fmt);
 #else
 	va_start(ap);
 #endif
 	pfatal("INTERNAL INCONSISTENCY:");
 	(void)vfprintf(stderr, fmt, ap);
 	va_end(ap);
 	exit(EEXIT);
 }
Index: head/sbin/mount/mount.c
===================================================================
--- head/sbin/mount/mount.c	(revision 34265)
+++ head/sbin/mount/mount.c	(revision 34266)
@@ -1,614 +1,618 @@
 /*-
  * Copyright (c) 1980, 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1989, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)mount.c	8.25 (Berkeley) 5/8/95";
 #else
 static const char rcsid[] =
-	"$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $";
+	"$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $";
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fstab.h>
 #include <pwd.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "extern.h"
 #include "pathnames.h"
 
 int debug, fstab_style, verbose;
 
 char   *catopt __P((char *, const char *));
 struct statfs
        *getmntpt __P((const char *));
 int	hasopt __P((const char *, const char *));
 int	ismounted __P((struct fstab *, struct statfs *, int));
 int	isremountable __P((const char *));
 void	mangle __P((char *, int *, const char **));
 int	mountfs __P((const char *, const char *, const char *,
 			int, const char *, const char *));
 void	prmount __P((struct statfs *));
 void	putfsent __P((const struct statfs *));
 void	usage __P((void));
 
 /* Map from mount otions to printable formats. */
 static struct opt {
 	int o_opt;
 	const char *o_name;
 } optnames[] = {
 	{ MNT_ASYNC,		"asynchronous" },
 	{ MNT_EXPORTED,		"NFS exported" },
 	{ MNT_LOCAL,		"local" },
 	{ MNT_NOATIME,		"noatime" },
 	{ MNT_NODEV,		"nodev" },
 	{ MNT_NOEXEC,		"noexec" },
 	{ MNT_NOSUID,		"nosuid" },
 	{ MNT_QUOTA,		"with quotas" },
 	{ MNT_RDONLY,		"read-only" },
 	{ MNT_SYNCHRONOUS,	"synchronous" },
 	{ MNT_UNION,		"union" },
 	{ MNT_NOCLUSTERR,	"noclusterr" },
 	{ MNT_NOCLUSTERW,	"noclusterw" },
 	{ MNT_SUIDDIR,		"suiddir" },
+	{ MNT_SOFTDEP,		"soft-updates" },
 	{ NULL }
 };
 
 /*
  * List of VFS types that can be remounted without becoming mounted on top
  * of each other.
  * XXX Is this list correct?
  */
 static const char *
 remountable_fs_names[] = {
 	"ufs", "ffs", "lfs", "ext2fs",
 	0
 };
 
 int
 main(argc, argv)
 	int argc;
 	char * const argv[];
 {
 	const char *mntfromname, **vfslist, *vfstype;
 	struct fstab *fs;
 	struct statfs *mntbuf;
 	FILE *mountdfp;
 	pid_t pid;
 	int all, ch, i, init_flags, mntsize, rval;
 	char *options;
 
 	all = init_flags = 0;
 	options = NULL;
 	vfslist = NULL;
 	vfstype = "ufs";
 	while ((ch = getopt(argc, argv, "adfo:prwt:uv")) != -1)
 		switch (ch) {
 		case 'a':
 			all = 1;
 			break;
 		case 'd':
 			debug = 1;
 			break;
 		case 'f':
 			init_flags |= MNT_FORCE;
 			break;
 		case 'o':
 			if (*optarg)
 				options = catopt(options, optarg);
 			break;
 		case 'p':
 			fstab_style = 1;
 			verbose = 1;
 			break;
 		case 'r':
 			init_flags |= MNT_RDONLY;
 			break;
 		case 't':
 			if (vfslist != NULL)
 				errx(1, "only one -t option may be specified.");
 			vfslist = makevfslist(optarg);
 			vfstype = optarg;
 			break;
 		case 'u':
 			init_flags |= MNT_UPDATE;
 			break;
 		case 'v':
 			verbose = 1;
 			break;
 		case 'w':
 			init_flags &= ~MNT_RDONLY;
 			break;
 		case '?':
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	argc -= optind;
 	argv += optind;
 
 #define	BADTYPE(type)							\
 	(strcmp(type, FSTAB_RO) &&					\
 	    strcmp(type, FSTAB_RW) && strcmp(type, FSTAB_RQ))
 
 	rval = 0;
 	switch (argc) {
 	case 0:
 		if ((mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)) == 0)
 			err(1, "getmntinfo");
 		if (all) {
 			while ((fs = getfsent()) != NULL) {
 				if (BADTYPE(fs->fs_type))
 					continue;
 				if (checkvfsname(fs->fs_vfstype, vfslist))
 					continue;
 				if (hasopt(fs->fs_mntops, "noauto"))
 					continue;
 				if (ismounted(fs, mntbuf, mntsize))
 					continue;
 				if (mountfs(fs->fs_vfstype, fs->fs_spec,
 				    fs->fs_file, init_flags, options,
 				    fs->fs_mntops))
 					rval = 1;
 			}
 		} else if (fstab_style) {
 			for (i = 0; i < mntsize; i++) {
 				if (checkvfsname(mntbuf[i].f_fstypename, vfslist))
 					continue;
 				putfsent(&mntbuf[i]);
 			}
 		} else {
 			for (i = 0; i < mntsize; i++) {
 				if (checkvfsname(mntbuf[i].f_fstypename,
 				    vfslist))
 					continue;
 				prmount(&mntbuf[i]);
 			}
 		}
 		exit(rval);
 	case 1:
 		if (vfslist != NULL)
 			usage();
 
 		if (init_flags & MNT_UPDATE) {
 			if ((mntbuf = getmntpt(*argv)) == NULL)
 				errx(1,
 				    "unknown special file or file system %s.",
 				    *argv);
 			if ((fs = getfsfile(mntbuf->f_mntonname)) != NULL)
 				mntfromname = fs->fs_spec;
 			else
 				mntfromname = mntbuf->f_mntfromname;
 			rval = mountfs(mntbuf->f_fstypename, mntfromname,
 			    mntbuf->f_mntonname, init_flags, options, 0);
 			break;
 		}
 		if ((fs = getfsfile(*argv)) == NULL &&
 		    (fs = getfsspec(*argv)) == NULL)
 			errx(1, "%s: unknown special file or file system.",
 			    *argv);
 		if (BADTYPE(fs->fs_type))
 			errx(1, "%s has unknown file system type.",
 			    *argv);
 		rval = mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file,
 		    init_flags, options, fs->fs_mntops);
 		break;
 	case 2:
 		/*
 		 * If -t flag has not been specified, and spec contains either
 		 * a ':' or a '@' then assume that an NFS filesystem is being
 		 * specified ala Sun.
 		 */
 		if (vfslist == NULL && strpbrk(argv[0], ":@") != NULL)
 			vfstype = "nfs";
 		rval = mountfs(vfstype,
 		    argv[0], argv[1], init_flags, options, NULL);
 		break;
 	default:
 		usage();
 		/* NOTREACHED */
 	}
 
 	/*
 	 * If the mount was successfully, and done by root, tell mountd the
 	 * good news.  Pid checks are probably unnecessary, but don't hurt.
 	 */
 	if (rval == 0 && getuid() == 0 &&
 	    (mountdfp = fopen(_PATH_MOUNTDPID, "r")) != NULL) {
 		if (fscanf(mountdfp, "%d", &pid) == 1 &&
 		     pid > 0 && kill(pid, SIGHUP) == -1 && errno != ESRCH)
 			err(1, "signal mountd");
 		(void)fclose(mountdfp);
 	}
 
 	exit(rval);
 }
 
 int
 ismounted(fs, mntbuf, mntsize)
 	struct fstab *fs;
 	struct statfs *mntbuf;
 	int mntsize;
 {
 	int i;
 
 	if (fs->fs_file[0] == '/' && fs->fs_file[1] == '\0')
 		/* the root file system can always be remounted */
 		return (0);
 
 	for (i = mntsize - 1; i >= 0; --i)
 		if (strcmp(fs->fs_file, mntbuf[i].f_mntonname) == 0 &&
 		    (!isremountable(fs->fs_vfstype) ||
 		     strcmp(fs->fs_spec, mntbuf[i].f_mntfromname) == 0))
 			return (1);
 	return (0);
 }
 
 int
 isremountable(vfsname)
 	const char *vfsname;
 {
 	const char **cp;
 
 	for (cp = remountable_fs_names; *cp; cp++)
 		if (strcmp(*cp, vfsname) == 0)
 			return (1);
 	return (0);
 }
 
 int
 hasopt(mntopts, option)
 	const char *mntopts, *option;
 {
 	int negative, found;
 	char *opt, *optbuf;
 
 	if (option[0] == 'n' && option[1] == 'o') {
 		negative = 1;
 		option += 2;
 	} else
 		negative = 0;
 	optbuf = strdup(mntopts);
 	found = 0;
 	for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) {
 		if (opt[0] == 'n' && opt[1] == 'o') {
 			if (!strcasecmp(opt + 2, option))
 				found = negative;
 		} else if (!strcasecmp(opt, option))
 			found = !negative;
 	}
 	free(optbuf);
 	return (found);
 }
 
 int
 mountfs(vfstype, spec, name, flags, options, mntopts)
 	const char *vfstype, *spec, *name, *options, *mntopts;
 	int flags;
 {
 	/* List of directories containing mount_xxx subcommands. */
 	static const char *edirs[] = {
 		_PATH_SBIN,
 		_PATH_USRSBIN,
 		NULL
 	};
 	const char *argv[100], **edir;
 	struct stat sb;
 	struct statfs sf;
 	pid_t pid;
 	int argc, i, status;
 	char *optbuf, execname[MAXPATHLEN + 1], mntpath[MAXPATHLEN];
 
 #if __GNUC__
 	(void)&optbuf;
 	(void)&name;
 #endif
 
 	if (realpath(name, mntpath) != NULL && stat(mntpath, &sb) == 0) {
 		if (!S_ISDIR(sb.st_mode)) {
 			warnx("%s: Not a directory", mntpath);
 			return (1);
 		}
 	} else {
 		warn("%s", mntpath);
 		return (1);
 	}
 
 	name = mntpath;
 
 	if (mntopts == NULL)
 		mntopts = "";
 	if (options == NULL) {
 		if (*mntopts == '\0') {
 			options = "rw";
 		} else {
 			options = mntopts;
 			mntopts = "";
 		}
 	}
 	optbuf = catopt(strdup(mntopts), options);
 
 	if (strcmp(name, "/") == 0)
 		flags |= MNT_UPDATE;
 	if (flags & MNT_FORCE)
 		optbuf = catopt(optbuf, "force");
 	if (flags & MNT_RDONLY)
 		optbuf = catopt(optbuf, "ro");
 	/*
 	 * XXX
 	 * The mount_mfs (newfs) command uses -o to select the
 	 * optimisation mode.  We don't pass the default "-o rw"
 	 * for that reason.
 	 */
 	if (flags & MNT_UPDATE)
 		optbuf = catopt(optbuf, "update");
 
 	argc = 0;
 	argv[argc++] = vfstype;
 	mangle(optbuf, &argc, argv);
 	argv[argc++] = spec;
 	argv[argc++] = name;
 	argv[argc] = NULL;
 
 	if (debug) {
 		(void)printf("exec: mount_%s", vfstype);
 		for (i = 1; i < argc; i++)
 			(void)printf(" %s", argv[i]);
 		(void)printf("\n");
 		return (0);
 	}
 
 	switch (pid = fork()) {
 	case -1:				/* Error. */
 		warn("fork");
 		free(optbuf);
 		return (1);
 	case 0:					/* Child. */
 		if (strcmp(vfstype, "ufs") == 0)
 			exit(mount_ufs(argc, (char * const *) argv));
 
 		/* Go find an executable. */
 		for (edir = edirs; *edir; edir++) {
 			(void)snprintf(execname,
 			    sizeof(execname), "%s/mount_%s", *edir, vfstype);
 			execv(execname, (char * const *)argv);
 		}
 		if (errno == ENOENT) {
 			int len = 0;
 			char *cp;
 			for (edir = edirs; *edir; edir++)
 				len += strlen(*edir) + 2;	/* ", " */
 			if ((cp = malloc(len)) == NULL) {
 				warn(NULL);
 				exit(1);
 			}
 			cp[0] = '\0';
 			for (edir = edirs; *edir; edir++) {
 				strcat(cp, *edir);
 				if (edir[1] != NULL)
 					strcat(cp, ", ");
 			}
 			warn("exec mount_%s not found in %s", vfstype, cp);
 		}
 		exit(1);
 		/* NOTREACHED */
 	default:				/* Parent. */
 		free(optbuf);
 
 		if (waitpid(pid, &status, 0) < 0) {
 			warn("waitpid");
 			return (1);
 		}
 
 		if (WIFEXITED(status)) {
 			if (WEXITSTATUS(status) != 0)
 				return (WEXITSTATUS(status));
 		} else if (WIFSIGNALED(status)) {
 			warnx("%s: %s", name, sys_siglist[WTERMSIG(status)]);
 			return (1);
 		}
 
 		if (verbose) {
 			if (statfs(name, &sf) < 0) {
 				warn("statfs %s", name);
 				return (1);
 			}
 			if (fstab_style)
 				putfsent(&sf);
 			else
 				prmount(&sf);
 		}
 		break;
 	}
 
 	return (0);
 }
 
 void
 prmount(sfp)
 	struct statfs *sfp;
 {
 	int flags;
 	struct opt *o;
 	struct passwd *pw;
 	int f;
 
 	(void)printf("%s on %s", sfp->f_mntfromname, sfp->f_mntonname);
 
 	flags = sfp->f_flags & MNT_VISFLAGMASK;
 	for (f = 0, o = optnames; flags && o->o_opt; o++)
 		if (flags & o->o_opt) {
 			(void)printf("%s%s", !f++ ? " (" : ", ", o->o_name);
 			flags &= ~o->o_opt;
 		}
 	if (sfp->f_owner) {
 		(void)printf("%smounted by ", !f++ ? " (" : ", ");
 		if ((pw = getpwuid(sfp->f_owner)) != NULL)
 			(void)printf("%s", pw->pw_name);
 		else
 			(void)printf("%d", sfp->f_owner);
 	}
-	(void)printf(f ? ")\n" : "\n");
+	(void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ",
+	    sfp->f_syncwrites, sfp->f_asyncwrites);
 }
 
 struct statfs *
 getmntpt(name)
 	const char *name;
 {
 	struct statfs *mntbuf;
 	int i, mntsize;
 
 	mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
 	for (i = 0; i < mntsize; i++)
 		if (strcmp(mntbuf[i].f_mntfromname, name) == 0 ||
 		    strcmp(mntbuf[i].f_mntonname, name) == 0)
 			return (&mntbuf[i]);
 	return (NULL);
 }
 
 char *
 catopt(s0, s1)
 	char *s0;
 	const char *s1;
 {
 	size_t i;
 	char *cp;
 
 	if (s0 && *s0) {
 		i = strlen(s0) + strlen(s1) + 1 + 1;
 		if ((cp = malloc(i)) == NULL)
 			err(1, NULL);
 		(void)snprintf(cp, i, "%s,%s", s0, s1);
 	} else
 		cp = strdup(s1);
 
 	if (s0)
 		free(s0);
 	return (cp);
 }
 
 void
 mangle(options, argcp, argv)
 	char *options;
 	int *argcp;
 	const char **argv;
 {
 	char *p, *s;
 	int argc;
 
 	argc = *argcp;
 	for (s = options; (p = strsep(&s, ",")) != NULL;)
 		if (*p != '\0')
 			if (*p == '-') {
 				argv[argc++] = p;
 				p = strchr(p, '=');
 				if (p) {
 					*p = '\0';
 					argv[argc++] = p+1;
 				}
 			} else if (strcmp(p, "rw") != 0) {
 				argv[argc++] = "-o";
 				argv[argc++] = p;
 			}
 
 	*argcp = argc;
 }
 
 void
 usage()
 {
 
 	(void)fprintf(stderr,
 		"usage: mount %s %s\n       mount %s\n       mount %s\n",
 		"[-dfpruvw] [-o options] [-t ufs | external_type]",
 			"special node",
 		"[-adfpruvw] [-t ufs | external_type]",
 		"[-dfpruvw] special | node");
 	exit(1);
 }
 
 void
 putfsent(ent)
 	const struct statfs *ent;
 {
 	struct fstab *fst;
 
 	printf("%s\t%s\t%s %s", ent->f_mntfromname, ent->f_mntonname,
 	    ent->f_fstypename, (ent->f_flags & MNT_RDONLY) ? "ro" : "rw");
 
 	/* XXX should use optnames[] - put shorter names in it. */
 	if (ent->f_flags & MNT_SYNCHRONOUS)
 		printf(",sync");
 	if (ent->f_flags & MNT_NOEXEC)
 		printf(",noexec");
 	if (ent->f_flags & MNT_NOSUID)
 		printf(",nosuid");
 	if (ent->f_flags & MNT_NODEV)
 		printf(",nodev");
 	if (ent->f_flags & MNT_UNION)
 		printf(",union");
 	if (ent->f_flags & MNT_ASYNC)
 		printf(",async");
 	if (ent->f_flags & MNT_NOATIME)
 		printf(",noatime");
 	if (ent->f_flags & MNT_NOCLUSTERR)
 		printf(",noclusterr");
 	if (ent->f_flags & MNT_NOCLUSTERW)
 		printf(",noclusterw");
+	if (ent->f_flags & MNT_SUIDDIR)
+		printf(",suiddir");
 
 	if ((fst = getfsspec(ent->f_mntfromname)))
 		printf("\t%u %u\n", fst->fs_freq, fst->fs_passno);
 	else if ((fst = getfsfile(ent->f_mntonname)))
 		printf("\t%u %u\n", fst->fs_freq, fst->fs_passno);
 	else if (strcmp(ent->f_fstypename, "ufs") == 0)
 		printf("\t1 1\n");
 	else
 		printf("\t0 0\n");
 }
Index: head/sbin/mount_ifs/mount.c
===================================================================
--- head/sbin/mount_ifs/mount.c	(revision 34265)
+++ head/sbin/mount_ifs/mount.c	(revision 34266)
@@ -1,614 +1,618 @@
 /*-
  * Copyright (c) 1980, 1989, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static const char copyright[] =
 "@(#) Copyright (c) 1980, 1989, 1993, 1994\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)mount.c	8.25 (Berkeley) 5/8/95";
 #else
 static const char rcsid[] =
-	"$Id: mount.c,v 1.21 1997/11/13 00:28:49 julian Exp $";
+	"$Id: mount.c,v 1.22 1998/02/13 04:54:27 bde Exp $";
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/wait.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fstab.h>
 #include <pwd.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 
 #include "extern.h"
 #include "pathnames.h"
 
 int debug, fstab_style, verbose;
 
 char   *catopt __P((char *, const char *));
 struct statfs
        *getmntpt __P((const char *));
 int	hasopt __P((const char *, const char *));
 int	ismounted __P((struct fstab *, struct statfs *, int));
 int	isremountable __P((const char *));
 void	mangle __P((char *, int *, const char **));
 int	mountfs __P((const char *, const char *, const char *,
 			int, const char *, const char *));
 void	prmount __P((struct statfs *));
 void	putfsent __P((const struct statfs *));
 void	usage __P((void));
 
 /* Map from mount otions to printable formats. */
 static struct opt {
 	int o_opt;
 	const char *o_name;
 } optnames[] = {
 	{ MNT_ASYNC,		"asynchronous" },
 	{ MNT_EXPORTED,		"NFS exported" },
 	{ MNT_LOCAL,		"local" },
 	{ MNT_NOATIME,		"noatime" },
 	{ MNT_NODEV,		"nodev" },
 	{ MNT_NOEXEC,		"noexec" },
 	{ MNT_NOSUID,		"nosuid" },
 	{ MNT_QUOTA,		"with quotas" },
 	{ MNT_RDONLY,		"read-only" },
 	{ MNT_SYNCHRONOUS,	"synchronous" },
 	{ MNT_UNION,		"union" },
 	{ MNT_NOCLUSTERR,	"noclusterr" },
 	{ MNT_NOCLUSTERW,	"noclusterw" },
 	{ MNT_SUIDDIR,		"suiddir" },
+	{ MNT_SOFTDEP,		"soft-updates" },
 	{ NULL }
 };
 
 /*
  * List of VFS types that can be remounted without becoming mounted on top
  * of each other.
  * XXX Is this list correct?
  */
 static const char *
 remountable_fs_names[] = {
 	"ufs", "ffs", "lfs", "ext2fs",
 	0
 };
 
 int
 main(argc, argv)
 	int argc;
 	char * const argv[];
 {
 	const char *mntfromname, **vfslist, *vfstype;
 	struct fstab *fs;
 	struct statfs *mntbuf;
 	FILE *mountdfp;
 	pid_t pid;
 	int all, ch, i, init_flags, mntsize, rval;
 	char *options;
 
 	all = init_flags = 0;
 	options = NULL;
 	vfslist = NULL;
 	vfstype = "ufs";
 	while ((ch = getopt(argc, argv, "adfo:prwt:uv")) != -1)
 		switch (ch) {
 		case 'a':
 			all = 1;
 			break;
 		case 'd':
 			debug = 1;
 			break;
 		case 'f':
 			init_flags |= MNT_FORCE;
 			break;
 		case 'o':
 			if (*optarg)
 				options = catopt(options, optarg);
 			break;
 		case 'p':
 			fstab_style = 1;
 			verbose = 1;
 			break;
 		case 'r':
 			init_flags |= MNT_RDONLY;
 			break;
 		case 't':
 			if (vfslist != NULL)
 				errx(1, "only one -t option may be specified.");
 			vfslist = makevfslist(optarg);
 			vfstype = optarg;
 			break;
 		case 'u':
 			init_flags |= MNT_UPDATE;
 			break;
 		case 'v':
 			verbose = 1;
 			break;
 		case 'w':
 			init_flags &= ~MNT_RDONLY;
 			break;
 		case '?':
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	argc -= optind;
 	argv += optind;
 
 #define	BADTYPE(type)							\
 	(strcmp(type, FSTAB_RO) &&					\
 	    strcmp(type, FSTAB_RW) && strcmp(type, FSTAB_RQ))
 
 	rval = 0;
 	switch (argc) {
 	case 0:
 		if ((mntsize = getmntinfo(&mntbuf, MNT_NOWAIT)) == 0)
 			err(1, "getmntinfo");
 		if (all) {
 			while ((fs = getfsent()) != NULL) {
 				if (BADTYPE(fs->fs_type))
 					continue;
 				if (checkvfsname(fs->fs_vfstype, vfslist))
 					continue;
 				if (hasopt(fs->fs_mntops, "noauto"))
 					continue;
 				if (ismounted(fs, mntbuf, mntsize))
 					continue;
 				if (mountfs(fs->fs_vfstype, fs->fs_spec,
 				    fs->fs_file, init_flags, options,
 				    fs->fs_mntops))
 					rval = 1;
 			}
 		} else if (fstab_style) {
 			for (i = 0; i < mntsize; i++) {
 				if (checkvfsname(mntbuf[i].f_fstypename, vfslist))
 					continue;
 				putfsent(&mntbuf[i]);
 			}
 		} else {
 			for (i = 0; i < mntsize; i++) {
 				if (checkvfsname(mntbuf[i].f_fstypename,
 				    vfslist))
 					continue;
 				prmount(&mntbuf[i]);
 			}
 		}
 		exit(rval);
 	case 1:
 		if (vfslist != NULL)
 			usage();
 
 		if (init_flags & MNT_UPDATE) {
 			if ((mntbuf = getmntpt(*argv)) == NULL)
 				errx(1,
 				    "unknown special file or file system %s.",
 				    *argv);
 			if ((fs = getfsfile(mntbuf->f_mntonname)) != NULL)
 				mntfromname = fs->fs_spec;
 			else
 				mntfromname = mntbuf->f_mntfromname;
 			rval = mountfs(mntbuf->f_fstypename, mntfromname,
 			    mntbuf->f_mntonname, init_flags, options, 0);
 			break;
 		}
 		if ((fs = getfsfile(*argv)) == NULL &&
 		    (fs = getfsspec(*argv)) == NULL)
 			errx(1, "%s: unknown special file or file system.",
 			    *argv);
 		if (BADTYPE(fs->fs_type))
 			errx(1, "%s has unknown file system type.",
 			    *argv);
 		rval = mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file,
 		    init_flags, options, fs->fs_mntops);
 		break;
 	case 2:
 		/*
 		 * If -t flag has not been specified, and spec contains either
 		 * a ':' or a '@' then assume that an NFS filesystem is being
 		 * specified ala Sun.
 		 */
 		if (vfslist == NULL && strpbrk(argv[0], ":@") != NULL)
 			vfstype = "nfs";
 		rval = mountfs(vfstype,
 		    argv[0], argv[1], init_flags, options, NULL);
 		break;
 	default:
 		usage();
 		/* NOTREACHED */
 	}
 
 	/*
 	 * If the mount was successfully, and done by root, tell mountd the
 	 * good news.  Pid checks are probably unnecessary, but don't hurt.
 	 */
 	if (rval == 0 && getuid() == 0 &&
 	    (mountdfp = fopen(_PATH_MOUNTDPID, "r")) != NULL) {
 		if (fscanf(mountdfp, "%d", &pid) == 1 &&
 		     pid > 0 && kill(pid, SIGHUP) == -1 && errno != ESRCH)
 			err(1, "signal mountd");
 		(void)fclose(mountdfp);
 	}
 
 	exit(rval);
 }
 
 int
 ismounted(fs, mntbuf, mntsize)
 	struct fstab *fs;
 	struct statfs *mntbuf;
 	int mntsize;
 {
 	int i;
 
 	if (fs->fs_file[0] == '/' && fs->fs_file[1] == '\0')
 		/* the root file system can always be remounted */
 		return (0);
 
 	for (i = mntsize - 1; i >= 0; --i)
 		if (strcmp(fs->fs_file, mntbuf[i].f_mntonname) == 0 &&
 		    (!isremountable(fs->fs_vfstype) ||
 		     strcmp(fs->fs_spec, mntbuf[i].f_mntfromname) == 0))
 			return (1);
 	return (0);
 }
 
 int
 isremountable(vfsname)
 	const char *vfsname;
 {
 	const char **cp;
 
 	for (cp = remountable_fs_names; *cp; cp++)
 		if (strcmp(*cp, vfsname) == 0)
 			return (1);
 	return (0);
 }
 
 int
 hasopt(mntopts, option)
 	const char *mntopts, *option;
 {
 	int negative, found;
 	char *opt, *optbuf;
 
 	if (option[0] == 'n' && option[1] == 'o') {
 		negative = 1;
 		option += 2;
 	} else
 		negative = 0;
 	optbuf = strdup(mntopts);
 	found = 0;
 	for (opt = optbuf; (opt = strtok(opt, ",")) != NULL; opt = NULL) {
 		if (opt[0] == 'n' && opt[1] == 'o') {
 			if (!strcasecmp(opt + 2, option))
 				found = negative;
 		} else if (!strcasecmp(opt, option))
 			found = !negative;
 	}
 	free(optbuf);
 	return (found);
 }
 
 int
 mountfs(vfstype, spec, name, flags, options, mntopts)
 	const char *vfstype, *spec, *name, *options, *mntopts;
 	int flags;
 {
 	/* List of directories containing mount_xxx subcommands. */
 	static const char *edirs[] = {
 		_PATH_SBIN,
 		_PATH_USRSBIN,
 		NULL
 	};
 	const char *argv[100], **edir;
 	struct stat sb;
 	struct statfs sf;
 	pid_t pid;
 	int argc, i, status;
 	char *optbuf, execname[MAXPATHLEN + 1], mntpath[MAXPATHLEN];
 
 #if __GNUC__
 	(void)&optbuf;
 	(void)&name;
 #endif
 
 	if (realpath(name, mntpath) != NULL && stat(mntpath, &sb) == 0) {
 		if (!S_ISDIR(sb.st_mode)) {
 			warnx("%s: Not a directory", mntpath);
 			return (1);
 		}
 	} else {
 		warn("%s", mntpath);
 		return (1);
 	}
 
 	name = mntpath;
 
 	if (mntopts == NULL)
 		mntopts = "";
 	if (options == NULL) {
 		if (*mntopts == '\0') {
 			options = "rw";
 		} else {
 			options = mntopts;
 			mntopts = "";
 		}
 	}
 	optbuf = catopt(strdup(mntopts), options);
 
 	if (strcmp(name, "/") == 0)
 		flags |= MNT_UPDATE;
 	if (flags & MNT_FORCE)
 		optbuf = catopt(optbuf, "force");
 	if (flags & MNT_RDONLY)
 		optbuf = catopt(optbuf, "ro");
 	/*
 	 * XXX
 	 * The mount_mfs (newfs) command uses -o to select the
 	 * optimisation mode.  We don't pass the default "-o rw"
 	 * for that reason.
 	 */
 	if (flags & MNT_UPDATE)
 		optbuf = catopt(optbuf, "update");
 
 	argc = 0;
 	argv[argc++] = vfstype;
 	mangle(optbuf, &argc, argv);
 	argv[argc++] = spec;
 	argv[argc++] = name;
 	argv[argc] = NULL;
 
 	if (debug) {
 		(void)printf("exec: mount_%s", vfstype);
 		for (i = 1; i < argc; i++)
 			(void)printf(" %s", argv[i]);
 		(void)printf("\n");
 		return (0);
 	}
 
 	switch (pid = fork()) {
 	case -1:				/* Error. */
 		warn("fork");
 		free(optbuf);
 		return (1);
 	case 0:					/* Child. */
 		if (strcmp(vfstype, "ufs") == 0)
 			exit(mount_ufs(argc, (char * const *) argv));
 
 		/* Go find an executable. */
 		for (edir = edirs; *edir; edir++) {
 			(void)snprintf(execname,
 			    sizeof(execname), "%s/mount_%s", *edir, vfstype);
 			execv(execname, (char * const *)argv);
 		}
 		if (errno == ENOENT) {
 			int len = 0;
 			char *cp;
 			for (edir = edirs; *edir; edir++)
 				len += strlen(*edir) + 2;	/* ", " */
 			if ((cp = malloc(len)) == NULL) {
 				warn(NULL);
 				exit(1);
 			}
 			cp[0] = '\0';
 			for (edir = edirs; *edir; edir++) {
 				strcat(cp, *edir);
 				if (edir[1] != NULL)
 					strcat(cp, ", ");
 			}
 			warn("exec mount_%s not found in %s", vfstype, cp);
 		}
 		exit(1);
 		/* NOTREACHED */
 	default:				/* Parent. */
 		free(optbuf);
 
 		if (waitpid(pid, &status, 0) < 0) {
 			warn("waitpid");
 			return (1);
 		}
 
 		if (WIFEXITED(status)) {
 			if (WEXITSTATUS(status) != 0)
 				return (WEXITSTATUS(status));
 		} else if (WIFSIGNALED(status)) {
 			warnx("%s: %s", name, sys_siglist[WTERMSIG(status)]);
 			return (1);
 		}
 
 		if (verbose) {
 			if (statfs(name, &sf) < 0) {
 				warn("statfs %s", name);
 				return (1);
 			}
 			if (fstab_style)
 				putfsent(&sf);
 			else
 				prmount(&sf);
 		}
 		break;
 	}
 
 	return (0);
 }
 
 void
 prmount(sfp)
 	struct statfs *sfp;
 {
 	int flags;
 	struct opt *o;
 	struct passwd *pw;
 	int f;
 
 	(void)printf("%s on %s", sfp->f_mntfromname, sfp->f_mntonname);
 
 	flags = sfp->f_flags & MNT_VISFLAGMASK;
 	for (f = 0, o = optnames; flags && o->o_opt; o++)
 		if (flags & o->o_opt) {
 			(void)printf("%s%s", !f++ ? " (" : ", ", o->o_name);
 			flags &= ~o->o_opt;
 		}
 	if (sfp->f_owner) {
 		(void)printf("%smounted by ", !f++ ? " (" : ", ");
 		if ((pw = getpwuid(sfp->f_owner)) != NULL)
 			(void)printf("%s", pw->pw_name);
 		else
 			(void)printf("%d", sfp->f_owner);
 	}
-	(void)printf(f ? ")\n" : "\n");
+	(void)printf("%swrites: sync %d async %d)\n", !f++ ? " (" : ", ",
+	    sfp->f_syncwrites, sfp->f_asyncwrites);
 }
 
 struct statfs *
 getmntpt(name)
 	const char *name;
 {
 	struct statfs *mntbuf;
 	int i, mntsize;
 
 	mntsize = getmntinfo(&mntbuf, MNT_NOWAIT);
 	for (i = 0; i < mntsize; i++)
 		if (strcmp(mntbuf[i].f_mntfromname, name) == 0 ||
 		    strcmp(mntbuf[i].f_mntonname, name) == 0)
 			return (&mntbuf[i]);
 	return (NULL);
 }
 
 char *
 catopt(s0, s1)
 	char *s0;
 	const char *s1;
 {
 	size_t i;
 	char *cp;
 
 	if (s0 && *s0) {
 		i = strlen(s0) + strlen(s1) + 1 + 1;
 		if ((cp = malloc(i)) == NULL)
 			err(1, NULL);
 		(void)snprintf(cp, i, "%s,%s", s0, s1);
 	} else
 		cp = strdup(s1);
 
 	if (s0)
 		free(s0);
 	return (cp);
 }
 
 void
 mangle(options, argcp, argv)
 	char *options;
 	int *argcp;
 	const char **argv;
 {
 	char *p, *s;
 	int argc;
 
 	argc = *argcp;
 	for (s = options; (p = strsep(&s, ",")) != NULL;)
 		if (*p != '\0')
 			if (*p == '-') {
 				argv[argc++] = p;
 				p = strchr(p, '=');
 				if (p) {
 					*p = '\0';
 					argv[argc++] = p+1;
 				}
 			} else if (strcmp(p, "rw") != 0) {
 				argv[argc++] = "-o";
 				argv[argc++] = p;
 			}
 
 	*argcp = argc;
 }
 
 void
 usage()
 {
 
 	(void)fprintf(stderr,
 		"usage: mount %s %s\n       mount %s\n       mount %s\n",
 		"[-dfpruvw] [-o options] [-t ufs | external_type]",
 			"special node",
 		"[-adfpruvw] [-t ufs | external_type]",
 		"[-dfpruvw] special | node");
 	exit(1);
 }
 
 void
 putfsent(ent)
 	const struct statfs *ent;
 {
 	struct fstab *fst;
 
 	printf("%s\t%s\t%s %s", ent->f_mntfromname, ent->f_mntonname,
 	    ent->f_fstypename, (ent->f_flags & MNT_RDONLY) ? "ro" : "rw");
 
 	/* XXX should use optnames[] - put shorter names in it. */
 	if (ent->f_flags & MNT_SYNCHRONOUS)
 		printf(",sync");
 	if (ent->f_flags & MNT_NOEXEC)
 		printf(",noexec");
 	if (ent->f_flags & MNT_NOSUID)
 		printf(",nosuid");
 	if (ent->f_flags & MNT_NODEV)
 		printf(",nodev");
 	if (ent->f_flags & MNT_UNION)
 		printf(",union");
 	if (ent->f_flags & MNT_ASYNC)
 		printf(",async");
 	if (ent->f_flags & MNT_NOATIME)
 		printf(",noatime");
 	if (ent->f_flags & MNT_NOCLUSTERR)
 		printf(",noclusterr");
 	if (ent->f_flags & MNT_NOCLUSTERW)
 		printf(",noclusterw");
+	if (ent->f_flags & MNT_SUIDDIR)
+		printf(",suiddir");
 
 	if ((fst = getfsspec(ent->f_mntfromname)))
 		printf("\t%u %u\n", fst->fs_freq, fst->fs_passno);
 	else if ((fst = getfsfile(ent->f_mntonname)))
 		printf("\t%u %u\n", fst->fs_freq, fst->fs_passno);
 	else if (strcmp(ent->f_fstypename, "ufs") == 0)
 		printf("\t1 1\n");
 	else
 		printf("\t0 0\n");
 }
Index: head/sbin/tunefs/tunefs.8
===================================================================
--- head/sbin/tunefs/tunefs.8	(revision 34265)
+++ head/sbin/tunefs/tunefs.8	(revision 34266)
@@ -1,150 +1,151 @@
 .\" Copyright (c) 1983, 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\" 3. All advertising materials mentioning features or use of this software
 .\"    must display the following acknowledgement:
 .\"	This product includes software developed by the University of
 .\"	California, Berkeley and its contributors.
 .\" 4. Neither the name of the University nor the names of its contributors
 .\"    may be used to endorse or promote products derived from this software
 .\"    without specific prior written permission.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\"     @(#)tunefs.8	8.2 (Berkeley) 12/11/93
 .\"
 .Dd December 11, 1993
 .Dt TUNEFS 8
 .Os BSD 4.2
 .Sh NAME
 .Nm tunefs
 .Nd tune up an existing file system
 .Sh SYNOPSIS
 .Nm tunefs
 .Op Fl A
 .Op Fl a Ar maxcontig
 .Op Fl d Ar rotdelay
 .Op Fl e Ar maxbpg
 .Op Fl m Ar minfree
 .Op Fl p
 .Bk -words
+.Op Fl n Ar soft_dependency_enabling
 .Op Fl o Ar optimize_preference
 .Ek
 .Op Ar special | Ar filesys
 .Sh DESCRIPTION
 .Nm Tunefs
 is designed to change the dynamic parameters of a file system
 which affect the layout policies.
 The parameters which are to be changed are indicated by the flags
 given below:
 .Bl -tag -width Ds
 .It Fl A
 The file system has several backups of the super-block.  Specifying
 this option will cause all backups to be modified as well as the
 primary super-block.  This is potentially dangerous - use with caution.
 .It Fl a Ar maxcontig
 This specifies the maximum number of contiguous blocks that will
 be laid out before forcing a rotational delay (see
 .Fl d
 below).
 The default value is one, since most device drivers require
 an interrupt per disk transfer.
 Device drivers that can chain several buffers together in a single
 transfer should set this to the maximum chain length.
 .It Fl d Ar rotdelay
 This specifies the expected time (in milliseconds)
 to service a transfer completion
 interrupt and initiate a new transfer on the same disk.
 It is used to decide how much rotational spacing to place between
 successive blocks in a file.
 .It Fl e Ar maxbpg
 This indicates the maximum number of blocks any single file can
 allocate out of a cylinder group before it is forced to begin
 allocating blocks from another cylinder group.
 Typically this value is set to about one quarter of the total blocks
 in a cylinder group.
 The intent is to prevent any single file from using up all the
 blocks in a single cylinder group,
 thus degrading access times for all files subsequently allocated
 in that cylinder group.
 The effect of this limit is to cause big files to do long seeks
 more frequently than if they were allowed to allocate all the blocks
 in a cylinder group before seeking elsewhere.
 For file systems with exclusively large files,
 this parameter should be set higher.
 .It Fl m Ar minfree
 This value specifies the percentage of space held back
 from normal users; the minimum free space threshold.
 The default value used is 8%.
 This value can be set to zero, however up to a factor of three
 in throughput will be lost over the performance obtained at a 10%
 threshold. Settings of 5% and less force space optimization to
 always be used which will greatly increase the overhead for file
 writes.
 Note that if the value is raised above the current usage level,
 users will be unable to allocate files until enough files have
 been deleted to get under the higher threshold.
 .It Fl o Ar optimize_preference
 The file system can either try to minimize the time spent
 allocating blocks, or it can attempt to minimize the space
 fragmentation on the disk. Optimization for space has much
 higher overhead for file writes.
 The kernel normally changes the preference automatically as
 the percent fragmentation changes on the file system.
 .It Fl p
 This option shows a summary of what the current tuneable settings
 are on the selected file system.  More detailed information can be
 obtained in the
 .Xr dumpfs 8
 manual page.
 .El
 .Sh SEE ALSO
 .Xr fs 5 ,
 .Xr dumpfs 8 ,
 .Xr newfs 8
 .Rs
 .%A M. McKusick
 .%A W. Joy
 .%A S. Leffler
 .%A R. Fabry
 .%T "A Fast File System for UNIX"
 .%J "ACM Transactions on Computer Systems 2"
 .%N 3
 .%P pp 181-197
 .%D August 1984
 .%O "(reprinted in the BSD System Manager's Manual, SMM:5)"
 .Re
 .Sh BUGS
 This program should work on mounted and active file systems.
 Because the super-block is not kept in the buffer cache,
 the changes will only take effect if the program
 is run on dismounted file systems.
 To change the root file system, the system must be rebooted
 after the file system is tuned.
 .\" Take this out and a Unix Demon will dog your steps from now until
 .\" the time_t's wrap around.
 .Pp
 You can tune a file system, but you can't tune a fish.
 .Sh HISTORY
 The
 .Nm
 command appeared in
 .Bx 4.2 .
Index: head/sbin/tunefs/tunefs.c
===================================================================
--- head/sbin/tunefs/tunefs.c	(revision 34265)
+++ head/sbin/tunefs/tunefs.c	(revision 34266)
@@ -1,311 +1,332 @@
 /*
  * Copyright (c) 1983, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 static char copyright[] =
 "@(#) Copyright (c) 1983, 1993\n\
 	The Regents of the University of California.  All rights reserved.\n";
 #endif /* not lint */
 
 #ifndef lint
 static char sccsid[] = "@(#)tunefs.c	8.2 (Berkeley) 4/19/94";
 #endif /* not lint */
 
 /*
  * tunefs: change layout parameters to an existing file system.
  */
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <ufs/ffs/fs.h>
 
 #include <errno.h>
 #include <err.h>
 #include <fcntl.h>
 #include <fstab.h>
 #include <stdio.h>
 #include <paths.h>
 #include <stdlib.h>
 #include <unistd.h>
 
 /* the optimization warning string template */
 #define	OPTWARN	"should optimize for %s with minfree %s %d%%"
 
 union {
 	struct	fs sb;
 	char pad[MAXBSIZE];
 } sbun;
 #define	sblock sbun.sb
 
 int fi;
 long dev_bsize = 1;
 
 void bwrite(daddr_t, char *, int);
 int bread(daddr_t, char *, int);
 void getsb(struct fs *, char *);
 void usage __P((void));
 void printfs __P((void));
 
 int
 main(argc, argv)
 	int argc;
 	char *argv[];
 {
-	char *cp, *special, *name;
+	char *cp, *special, *name, *action;
 	struct stat st;
 	int i;
 	int Aflag = 0;
 	struct fstab *fs;
 	char *chg[2], device[MAXPATHLEN];
 
 	argc--, argv++;
 	if (argc < 2)
 		usage();
 	special = argv[argc - 1];
 	fs = getfsfile(special);
 	if (fs)
 		special = fs->fs_spec;
 again:
 	if (stat(special, &st) < 0) {
 		if (*special != '/') {
 			if (*special == 'r')
 				special++;
 			(void)sprintf(device, "%s/%s", _PATH_DEV, special);
 			special = device;
 			goto again;
 		}
 		err(1, "%s", special);
 	}
 	if ((st.st_mode & S_IFMT) != S_IFBLK &&
 	    (st.st_mode & S_IFMT) != S_IFCHR)
 		errx(10, "%s: not a block or character device", special);
 	getsb(&sblock, special);
 	for (; argc > 0 && argv[0][0] == '-'; argc--, argv++) {
 		for (cp = &argv[0][1]; *cp; cp++)
 			switch (*cp) {
 
 			case 'A':
 				Aflag++;
 				continue;
 
 			case 'p':
 				printfs();
 				exit(0);
 
 			case 'a':
 				name = "maximum contiguous block count";
 				if (argc < 1)
 					errx(10, "-a: missing %s", name);
 				argc--, argv++;
 				i = atoi(*argv);
 				if (i < 1)
 					errx(10, "%s must be >= 1 (was %s)",
 					    name, *argv);
 				warnx("%s changes from %d to %d",
 				    name, sblock.fs_maxcontig, i);
 				sblock.fs_maxcontig = i;
 				continue;
 
 			case 'd':
 				name =
 				   "rotational delay between contiguous blocks";
 				if (argc < 1)
 					errx(10, "-d: missing %s", name);
 				argc--, argv++;
 				i = atoi(*argv);
 				warnx("%s changes from %dms to %dms",
 				    name, sblock.fs_rotdelay, i);
 				sblock.fs_rotdelay = i;
 				continue;
 
 			case 'e':
 				name =
 				  "maximum blocks per file in a cylinder group";
 				if (argc < 1)
 					errx(10, "-e: missing %s", name);
 				argc--, argv++;
 				i = atoi(*argv);
 				if (i < 1)
 					errx(10, "%s must be >= 1 (was %s)",
 					    name, *argv);
 				warnx("%s changes from %d to %d",
 				    name, sblock.fs_maxbpg, i);
 				sblock.fs_maxbpg = i;
 				continue;
 
 			case 'm':
 				name = "minimum percentage of free space";
 				if (argc < 1)
 					errx(10, "-m: missing %s", name);
 				argc--, argv++;
 				i = atoi(*argv);
 				if (i < 0 || i > 99)
 					errx(10, "bad %s (%s)", name, *argv);
 				warnx("%s changes from %d%% to %d%%",
 				    name, sblock.fs_minfree, i);
 				sblock.fs_minfree = i;
 				if (i >= MINFREE &&
 				    sblock.fs_optim == FS_OPTSPACE)
 					warnx(OPTWARN, "time", ">=", MINFREE);
 				if (i < MINFREE &&
 				    sblock.fs_optim == FS_OPTTIME)
 					warnx(OPTWARN, "space", "<", MINFREE);
 				continue;
 
+			case 'n':
+ 				name = "soft updates";
+ 				if (argc < 1)
+ 					errx(10, "-s: missing %s", name);
+ 				argc--, argv++;
+ 				if (strcmp(*argv, "enable") == 0) {
+ 					sblock.fs_flags |= FS_DOSOFTDEP;
+ 					action = "set";
+ 				} else if (strcmp(*argv, "disable") == 0) {
+ 					sblock.fs_flags &= ~FS_DOSOFTDEP;
+ 					action = "cleared";
+ 				} else {
+ 					errx(10, "bad %s (options are %s)",
+ 					    name, "`enable' or `disable'");
+ 				}
+ 				warnx("%s %s", name, action);
+ 				continue;
+ 
 			case 'o':
 				name = "optimization preference";
 				if (argc < 1)
 					errx(10, "-o: missing %s", name);
 				argc--, argv++;
 				chg[FS_OPTSPACE] = "space";
 				chg[FS_OPTTIME] = "time";
 				if (strcmp(*argv, chg[FS_OPTSPACE]) == 0)
 					i = FS_OPTSPACE;
 				else if (strcmp(*argv, chg[FS_OPTTIME]) == 0)
 					i = FS_OPTTIME;
 				else
 					errx(10, "bad %s (options are `space' or `time')",
 					    name);
 				if (sblock.fs_optim == i) {
 					warnx("%s remains unchanged as %s",
 					    name, chg[i]);
 					continue;
 				}
 				warnx("%s changes from %s to %s",
 				    name, chg[sblock.fs_optim], chg[i]);
 				sblock.fs_optim = i;
 				if (sblock.fs_minfree >= MINFREE &&
 				    i == FS_OPTSPACE)
 					warnx(OPTWARN, "time", ">=", MINFREE);
 				if (sblock.fs_minfree < MINFREE &&
 				    i == FS_OPTTIME)
 					warnx(OPTWARN, "space", "<", MINFREE);
 				continue;
 
 			default:
 				usage();
 			}
 	}
 	if (argc != 1)
 		usage();
 	bwrite((daddr_t)SBOFF / dev_bsize, (char *)&sblock, SBSIZE);
 	if (Aflag)
 		for (i = 0; i < sblock.fs_ncg; i++)
 			bwrite(fsbtodb(&sblock, cgsblock(&sblock, i)),
 			    (char *)&sblock, SBSIZE);
 	close(fi);
 	exit(0);
 }
 
 void
 usage()
 {
 
 	fprintf(stderr, "usage: tunefs tuneup-options special-device\n");
 	fprintf(stderr, "where tuneup-options are:\n");
 	fprintf(stderr, "\t-a maximum contiguous blocks\n");
 	fprintf(stderr, "\t-d rotational delay between contiguous blocks\n");
 	fprintf(stderr, "\t-e maximum blocks per file in a cylinder group\n");
 	fprintf(stderr, "\t-m minimum percentage of free space\n");
+	fprintf(stderr, "\t-n soft updates (`enable' or `disable')\n");
 	fprintf(stderr, "\t-o optimization preference (`space' or `time')\n");
 	fprintf(stderr, "\t-p no change - just prints current tuneable settings\n");
 	exit(2);
 }
 
 void
 getsb(fs, file)
 	register struct fs *fs;
 	char *file;
 {
 
 	fi = open(file, 2);
 	if (fi < 0)
 		err(3, "cannot open %s", file);
 	if (bread((daddr_t)SBOFF, (char *)fs, SBSIZE))
 		err(4, "%s: bad super block", file);
 	if (fs->fs_magic != FS_MAGIC)
 		err(5, "%s: bad magic number", file);
 	dev_bsize = fs->fs_fsize / fsbtodb(fs, 1);
 }
 
 void
 printfs()
 {
+	warnx("soft updates:  (-n)                                %s", 
+		(sblock.fs_flags & FS_DOSOFTDEP)? "enabled" : "disabled");
 	warnx("maximum contiguous block count: (-a)               %d",
 	      sblock.fs_maxcontig);
 	warnx("rotational delay between contiguous blocks: (-d)   %d ms",
 	      sblock.fs_rotdelay);
 	warnx("maximum blocks per file in a cylinder group: (-e)  %d",
 	      sblock.fs_maxbpg);
 	warnx("minimum percentage of free space: (-m)             %d%%",
 	      sblock.fs_minfree);
 	warnx("optimization preference: (-o)                      %s",
 	      sblock.fs_optim == FS_OPTSPACE ? "space" : "time");
 	if (sblock.fs_minfree >= MINFREE &&
 	    sblock.fs_optim == FS_OPTSPACE)
 		warnx(OPTWARN, "time", ">=", MINFREE);
 	if (sblock.fs_minfree < MINFREE &&
 	    sblock.fs_optim == FS_OPTTIME)
 		warnx(OPTWARN, "space", "<", MINFREE);
 }
 
 void
 bwrite(blk, buf, size)
 	daddr_t blk;
 	char *buf;
 	int size;
 {
 
 	if (lseek(fi, (off_t)blk * dev_bsize, SEEK_SET) < 0)
 		err(6, "FS SEEK");
 	if (write(fi, buf, size) != size)
 		err(7, "FS WRITE");
 }
 
 int
 bread(bno, buf, cnt)
 	daddr_t bno;
 	char *buf;
 	int cnt;
 {
 	int i;
 
 	if (lseek(fi, (off_t)bno * dev_bsize, SEEK_SET) < 0)
 		return(1);
 	if ((i = read(fi, buf, cnt)) != cnt) {
 		for(i=0; i<sblock.fs_bsize; i++)
 			buf[i] = 0;
 		return (1);
 	}
 	return (0);
 }
Index: head/sys/conf/NOTES
===================================================================
--- head/sys/conf/NOTES	(revision 34265)
+++ head/sys/conf/NOTES	(revision 34266)
@@ -1,1497 +1,1504 @@
 #
 # LINT -- config file for checking all the sources, tries to pull in
 #	as much of the source tree as it can.
 #
-#	$Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $
+#	$Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $
 #
 # NB: You probably don't want to try running a kernel built from this
 # file.  Instead, you should start from GENERIC, and add options from
 # this file as required.
 #
 
 #
 # This directive is mandatory; it defines the architecture to be
 # configured for; in this case, the 386 family based IBM-PC and
 # compatibles.
 #
 machine		"i386"
 
 # 
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a complicated formula defined in param.c.
 #
 maxusers	10
 
 #
 # Certain applications can grow to be larger than the 128M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 256MB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  You might want to set the default lower than the
 # max, and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options		"MAXDSIZ=(256*1024*1024)"
 options		"DFLDSIZ=(256*1024*1024)"
 
 # When this is set, be extra conservative in various parts of the kernel
 # and choose functionality over speed (on the widest variety of systems).
 options		FAILSAFE
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings /kernel | grep ^___ | sed -e 's/^___//' > MYKERNEL
 #
 options         INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # This directive defines a number of things:
 #  - The compiled kernel is to be called `kernel'
 #  - The root filesystem might be on partition wd0a
 #  - Crash dumps will be written to wd0b, if possible.  Specifying the
 #    dump device here is not recommended.  Use dumpon(8).
 #
 config		kernel	root on wd0 dumps on wd0
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 # APIC_IO enables the use of the IO APIC for Symmetric I/O.
 # NCPU sets the number of CPUs, defaults to 2.
 # NBUS sets the number of busses, defaults to 4.
 # NAPIC sets the number of IO APICs on the motherboard, defaults to 1.
 # NINTR sets the total number of INTs provided by the motherboard.
 #
 # Notes:
 #
 #  An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard.
 #
 #  Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels.
 #
 #  Check the 'Rogue SMP hardware' section to see if additional options
 #   are required by your hardware.
 #
 
 # Mandatory:
 options		SMP			# Symmetric MultiProcessor Kernel
 options		APIC_IO			# Symmetric (APIC) I/O
 
 # Optional, these are the defaults plus 1:
 options		NCPU=5			# number of CPUs
 options		NBUS=5			# number of busses
 options		NAPIC=2			# number of IO APICs
 options		NINTR=25		# number of INTs
 
 #
 # Rogue SMP hardware:
 #
 
 # Bridged PCI cards:
 #
 # The MP tables of most of the current generation MP motherboards
 #  do NOT properly support bridged PCI cards.  To use one of these
 #  cards you should refer to ???
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.  This is especially true removing
 # I386_CPU.
 #
 cpu		"I386_CPU"
 cpu		"I486_CPU"
 cpu		"I586_CPU"		# aka Pentium(tm)
 cpu		"I686_CPU"		# aka Pentium Pro(tm)
 
 #
 # Options for CPU features.
 #
 # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
 # BlueLightning CPU.  It works only with Cyrix FPU, and this option
 # should not be used with Intel FPU.
 #
 # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning 
 # CPU if CPU supports it. The default is double-clock mode on
 # BlueLightning CPU box.  
 #
 # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
 #
 # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
 # mapped mode.  Default is 2-way set associative mode.
 #
 # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
 # of Cyrix 6x86 and 6x86MX CPUs.  If this option is not set and
 # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
 #
 # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
 # reorder).  This option should not be used if you use memory mapped
 # I/O device(s). 
 #
 # CPU_FASTER_5X86_FPU enables faster FPU exception handler.
 #
 # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
 # for i386 machines. 
 #
 # CPU_IORT defines I/O clock delay time (NOTE 1).  Default vaules of
 # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
 # (no clock delay).
 #
 # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
 # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
 # 1). 
 #
 # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
 #
 # CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
 # enters suspend mode following execution of HALT instruction.
 #
 # CPU_WT_ALLOC enables write-through allocation.
 #
 # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
 # flush at hold state.
 #
 # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
 # without cache flush at hold state, and (2) write-back CPU cache on
 # Cyrix 6x86 whose revision < 2.7 (NOTE 2).
 #
 # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
 # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
 # executed.  This should be included for ALL kernels that won't run
 # on a Pentium.
 #
 # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
 # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs.
 # These options may crash your system. 
 #
 # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
 # in write-through mode when revision < 2.7.  If revision of Cyrix
 # 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
 #
 # NOTE 3: This option may cause failures for software that requires
 # locked cycles in order to operate correctly.
 #
 options		"CPU_BLUELIGHTNING_FPU_OP_CACHE"
 options		"CPU_BLUELIGHTNING_3X"
 options		"CPU_BTB_EN"
 options		"CPU_DIRECT_MAPPED_CACHE"
 options		"CPU_DISABLE_5X86_LSSER"
 options		"CPU_FASTER_5X86_FPU"
 options		"CPU_I486_ON_386"
 options		"CPU_IORT"
 options		"CPU_LOOP_EN"
 options		"CPU_RSTK_EN"
 options		"CPU_SUSP_HLT"
 options		"CYRIX_CACHE_WORKS"
 options		"CYRIX_CACHE_REALLY_WORKS"
 #options	"NO_F00F_HACK"
 
 #
 # A math emulator is mandatory if you wish to run on hardware which
 # does not have a floating-point processor.  Pick either the original,
 # bogus (but freely-distributable) math emulator, or a much more
 # fully-featured but GPL-licensed emulator taken from Linux.
 #
 options		MATH_EMULATE		#Support for x87 emulation
 # Don't enable both of these in a real config.
 options		GPL_MATH_EMULATE	#Support for x87 emulation via
 					#new math emulator 
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS                                             
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.
 #
 options		"COMPAT_43"
 
 #
 # Allow user-mode programs to manipulate their local descriptor tables.
 # This option is required for the WINE Windows(tm) emulator, and is
 # not used by anything else (that we know of).
 #
 options		USER_LDT		#allow user-level control of i386 ldt
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options		SYSVSHM
 options		SYSVSEM
 options		SYSVMSG
 
 #
 # This option includes a MD5 routine in the kernel, this is used for
 # various authentication and privacy uses.
 #
 options		"MD5"
 
 #
 # Allow processes to switch to vm86 mode, as well as enabling direct
 # user-mode access to the I/O port space.  This option is necessary for 
 # the doscmd emulator to run.
 #
 options		"VM86"
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Enable the kernel debugger.
 #
 options		DDB
 
 #
 # Don't drop into DDB for a panic. Intended for unattended operation
 # where you may want to drop to DDB from the console, but still want
 # the machine to recover from a panic
 #
 options		DDB_UNATTENDED
 
 #
 # If using GDB remote mode to debug the kernel, there's a non-standard
 # extension to the remote protocol that can be used to use the serial
 # port as both the debugging port and the system console.  It's non-
 # standard and you're on your own if you enable it.  See also the
 # "remotechat" variables in the FreeBSD specific version of gdb.
 #
 options		GDB_REMOTE_CHAT
 
 # 
 # KTRACE enables the system-call tracing facility ktrace(2).
 #
 options		KTRACE			#kernel tracing
 
 #
 # The DIAGNOSTIC option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options		DIAGNOSTIC
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 options		PERFMON
 
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options COMPILING_LINT
 
 
 # XXX - this doesn't belong here.
 # Allow ordinary users to take the console - this is useful for X.
 options		UCONSOLE
 
 # XXX - this doesn't belong here either
 options		USERCONFIG		#boot -c editor
 options		USERCONFIG_BOOT		#imply -c and parse info area
 options		VISUAL_USERCONFIG	#visual boot -c editor
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #  Source code for the NS (Xerox Network Service) is provided for amusement
 #  value.
 #
 options		INET			#Internet communications protocols
 
 options		IPX			#IPX/SPX communications protocols
 options		IPXIP			#IPX in IP encapsulation (not available)
 options		IPTUNNEL		#IP in IPX encapsulation (not available)
 
 options		NETATALK		#Appletalk communications protocols
 
 # These are currently broken but are shipped due to interest.
 #options		NS			#Xerox NS protocols
 
 # These are currently broken and are no longer shipped due to lack
 # of interest.
 #options		CCITT			#X.25 network layer
 #options		ISO
 #options		TPIP			#ISO TP class 4 over IP
 #options		TPCONS			#ISO TP class 0 over X.25
 #options		LLC			#X.25 link layer for Ethernets
 #options		HDLC			#X.25 link layer for serial lines
 #options		EON			#ISO CLNP over IP
 #options		NSIP			#XNS over IP
 
 #
 # Network interfaces:
 #  The `loop' pseudo-device is MANDATORY when networking is enabled.
 #  The `ether' pseudo-device provides generic code to handle
 #  Ethernets; it is MANDATORY when a Ethernet device driver is
 #  configured.
 #  The 'fddi' pseudo-device provides generic code to support FDDI.
 #  The `sppp' pseudo-device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' pseudo-device implements the Serial Line IP (SLIP) service.
 #  The `ppp' pseudo-device implements the Point-to-Point Protocol.
 #  The `bpfilter' pseudo-device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' pseudo-device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.
 #  The `tun' pseudo-device implements the User Process PPP (iijppp)
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpfilter.
 # See pppd(8) for more details.
 #
 pseudo-device	ether			#Generic Ethernet
 pseudo-device	fddi			#Generic FDDI
 pseudo-device	sppp			#Generic Synchronous PPP
 pseudo-device	loop			#Network loopback device
 pseudo-device	bpfilter	4	#Berkeley packet filter
 pseudo-device	disc			#Discard device
 pseudo-device	tun	1		#Tunnel driver (user process ppp(8))
 pseudo-device	sl	2		#Serial Line IP
 pseudo-device	ppp	2		#Point-to-point protocol
 options PPP_BSDCOMP			#PPP BSD-compress support
 options PPP_DEFLATE			#PPP zlib/deflate/gzip support
 options PPP_FILTER			#enable bpf filtering (needs bpfilter)
 
 #
 # Internet family options:
 #
 # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in
 # 4.2BSD.  This option should not be used unless you have a 4.2BSD
 # machine and TCP connections fail.
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # TCPDEBUG is undocumented.
 #
 options		"TCP_COMPAT_42"		#emulate 4.2BSD TCP bugs
 options		MROUTING		# Multicast routing
 options         IPFIREWALL              #firewall
 options         IPFIREWALL_VERBOSE      #print information about
 					# dropped packets
 options		"IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity
 options		IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default
 options		IPDIVERT		#divert sockets
 options		TCPDEBUG
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family---FFS, and MFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options		FFS			#Fast filesystem
 options		NFS			#Network File System
 
 # The rest are optional:
 # options	NFS_NOSERVER		#Disable the NFS-server code.
 options		"CD9660"		#ISO 9660 filesystem
 options		FDESC			#File descriptor filesystem
 options		KERNFS			#Kernel filesystem
 options		MFS			#Memory File System
 options		MSDOSFS			#MS DOS File System
 options		NULLFS			#NULL filesystem
 options		PORTAL			#Portal filesystem
 options		PROCFS			#Process filesystem
 options		UMAPFS			#UID map filesystem
 options		UNION			#Union filesystem
 options		"CD9660_ROOT"		#CD-ROM usable as root device
 options		FFS_ROOT		#FFS usable as root device
 options		NFS_ROOT		#NFS usable as root device
 # This DEVFS is experimental but seems to work
 options		DEVFS			#devices filesystem
+
+# Allow the FFS to use Softupdates technology.
+# To do this you need to fetch the two files
+# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c
+# from freebsd.org and understand the licensing restrictions.
+#options		SOFTUPDATES
+# (we can't actually enable it because the files may not be present)
 
 # Make space in the kernel for a MFS root filesystem.  Define to the number
 # of kilobytes to reserve for the filesystem.
 options		MFS_ROOT=10
 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing.
 options		MFS_AUTOLOAD
 
 # Allow this many swap-devices.
 options		NSWAPDEV=20
 
 # Disk quotas are supported when this option is enabled.  If you
 # change the value of this option, you must do a `make clean' in your
 # kernel compile directory in order to get a working kernel.
 #
 options		QUOTA			#enable disk quotas
 
 # Add more checking code to various filesystems
 #options		NULLFS_DIAGNOSTIC
 #options		KERNFS_DIAGNOSTIC
 #options		UMAPFS_DIAGNOSTIC
 #options		UNION_DIAGNOSTIC
 
 # In particular multi-session CD-Rs might require a huge amount of
 # time in order to "settle".  If we are about mounting them as the
 # root f/s, we gotta wait a little.
 #
 # The number is supposed to be in seconds.
 options		"CD9660_ROOTDELAY=20"
 
 # If you are running a machine just as a fileserver for PC and MAC users.
 # (using SAMBA or Netatalk), then you may consider setting this option
 # and keeping all those user's directories on a partition that is mounted
 # with the suiddir option. This gives new files the same ownership as
 # the directory (similiar to group). It's a security hole if you let
 # these users run programs so confine it to file-servers, (but it'll save you
 # lots of headaches in that case). Root owned directories are excempt and X bits
 # are cleared. the suid bit must be set on the directory as well. see chmod(1)
 # PC owners can't see/set ownerships so they keep getting their toes
 # trodden on. This saves you all the support calls as the filesystem
 # it's used on will act as they expect. ("It's my dir so it must be my file").
 #
 options		SUIDDIR
 
 
 # Add some error checking code to the null_bypass routine
 # in the NULL filesystem
 #options		SAFETY
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so
 # that a given bus, target, and LUN always come on line as the same
 # device unit.  In earlier versions the unit numbers were assigned
 # in the order that the devices were probed on the SCSI bus.  This
 # means that if you removed a disk drive, you may have had to rewrite
 # your /etc/fstab file, and also that you had to be careful when adding
 # a new disk as it may have been probed earlier and moved your device
 # configuration around.
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "sd3" then the first
 # non-wired disk will be assigned sd4.
 
 # The syntax for wiring down devices is:
 
 # controller	scbus0 at ahc0		# Single bus device
 # controller	scbus1 at ahc1 bus 0	# Single bus device
 # controller	scbus3 at ahc2 bus 0	# Twin bus device
 # controller	scbus2 at ahc2 bus 1	# Twin bus device
 # disk 		sd0 at scbus0 target 0 unit 0
 # disk		sd1 at scbus3 target 1
 # disk		sd2 at scbus2 target 3
 # tape		st1 at scbus1 target 6
 # device	cd0 at scbus?
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration and doesn't have to be explicitly configured.
 
 controller	scbus0	#base SCSI code
 device		ch0	#SCSI media changers
 device		sd0	#SCSI disks
 device		st0	#SCSI tapes
 device		cd0	#SCSI CD-ROMs
 device		od0	#SCSI optical disk
 
 # The previous devices (ch, sd, st, cd) are recognized by config.
 # config doesn't (and shouldn't) know about these newer ones,
 # so we have to specify that they are on a SCSI bus with the "at scbus?"
 # clause.
 
 device worm0 at scbus?	# SCSI worm
 device pt0 at scbus?	# SCSI processor type
 device sctarg0 at scbus? # SCSI target
 
 # SCSI OPTIONS:
 
 # SCSIDEBUG: When defined enables debugging macros
 # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k)
 # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead
 #                       of only when booting verbosely.
 options		SCSIDEBUG
 #options	NO_SCSI_SENSE
 options		SCSI_REPORT_GEOMETRY
 
 # Options for the `od' optical disk driver:
 #
 # If drive returns sense key as 0x02 with vendor specific additional
 # sense code (ASC) and additional sense code qualifier (ASCQ), or
 # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying.
 # To suppress this, use the following option.
 #
 options		OD_BOGUS_NOT_READY
 #
 # For an automatic spindown, try this.  Again, preferably as an
 # option in your config file.
 # WARNING!  Use at your own risk.  Joerg's ancient SONY SMO drive
 # groks it fine, while Shunsuke's Fujitsu chokes on it and times
 # out.
 #
 options		OD_AUTO_TURNOFF
 
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 pseudo-device	pty	16	#Pseudo ttys - can go as high as 256
 pseudo-device	speaker		#Play IBM BASIC-style noises out your speaker
 pseudo-device	gzip		#Exec gzipped a.out's
 pseudo-device	vn		#Vnode driver (turns a file into a device)
 pseudo-device	snp	3	#Snoop device - to look at pty/vty/etc..
 pseudo-device	ccd	4	#Concatenated disk driver
 
 # These are only for watching for bitrot in old tty code.
 # broken
 #pseudo-device	tb
 
 # These are only for watching for bitrot in old SCSI code.
 pseudo-device	su		#scsi user
 pseudo-device	ssc		#super scsi
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # ISA and EISA devices:
 # EISA support is available for some device, so they can be auto-probed.
 # Micro Channel is not supported at all.
 
 #
 # Mandatory ISA devices: isa, npx
 #
 controller	isa0
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # BOUNCE_BUFFERS provides support for ISA DMA on machines with more
 # than 16 megabytes of memory.  It doesn't hurt on other machines.
 # Some broken EISA and VLB hardware may need this, too.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # TUNE_1542 enables the automatic ISA bus speed selection for the
 # Adaptec 1542 boards. Does not work for all boards, use it with caution.
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 #
 # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum
 
 options		"AUTO_EOI_1"
 #options	"AUTO_EOI_2"
 options		BOUNCE_BUFFERS
 options		"MAXMEM=(128*1024)"
 options 	"TUNE_1542"
 #options	BROKEN_KEYBOARD_RESET
 #options	PAS_JOYSTICK_ENABLE
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z
 
 options		PPS_SYNC
 
 # Enable PnP support in the kernel.  This allows you to automaticly
 # attach to PnP cards for drivers that support it and allows you to
 # configure cards from USERCONFIG.  See pnp(4) for more info.
 controller	pnp0
 
 # The pcvt console driver (vt220 compatible).
 device		vt0	at isa? port "IO_KBD" tty irq 1 vector pcrint
 options		XSERVER			# support for running an X server.
 options		FAT_CURSOR		# start with block cursor
 # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops
 options		PCVT_SCANSET=2 		# IBM keyboards are non-std
 
 # The syscons console driver (sco color console compatible).
 device		sc0	at isa? port "IO_KBD" tty irq 1 vector scintr
 options		MAXCONS=16		# number of virtual consoles
 options		SLOW_VGA		# do byte-wide i/o's to TS and GDC regs
 options		"STD8X16FONT"		# Compile font in
 makeoptions	"STD8X16FONT"="cp850"
 options		SC_HISTORY_SIZE=200	# number of history buffer lines
 
 #
 # `flags' for sc0:
 #       0x01    Use a 'visual' bell
 #       0x02    Use a 'blink' cursor
 #       0x04    Use a 'underline' cursor
 #       0x06    Use a 'blinking underline' (destructive) cursor
 #       0x08    Force detection of keyboard, else we always assume a keyboard
 #       0x10    Old-style (XT) keyboard support, useful for older ThinkPads
 #       0x20    Don't reset keyboard, useful for some newer ThinkPads
 
 #
 # The Numeric Processing eXtension driver.  This should be configured if
 # your machine has a math co-processor, unless the coprocessor is very
 # buggy. If it is not configured then you *must* configure math emulation
 # (see above).  If both npx0 and emulation are configured, then only npx0
 # is used (provided it works).
 device		npx0	at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr
 
 #
 # `flags' for npx0:
 #	0x01	don't use the npx registers to optimize bcopy
 #	0x02	don't use the npx registers to optimize bzero
 #	0x04	don't use the npx registers to optimize copyin or copyout.
 # The npx registers are normally used to optimize copying and zeroing when
 # all of the following conditions are satisfied:
 #	"I586_CPU" is an option
 #	the cpu is an i586 (perhaps not a Pentium)
 #	the probe for npx0 succeeds
 #	INT 16 exception handling works.
 # Then copying and zeroing using the npx registers is normally 30-100% faster.
 # The flags can be used to control cases where it doesn't work or is slower.
 # Setting them at boot time using userconfig works right (the optimizations
 # are not used until later in the bootstrap when npx0 is attached).
 #
 
 #
 # `iosiz' for npx0:
 # This can be used instead of the MAXMEM option to set the memory size.  If
 # it is nonzero, then it overrides both the MAXMEM option and the memory
 # size reported by the BIOS.  Setting it at boot time using userconfig takes
 # effect on the next reboot after the change has been recorded in the kernel
 # binary (the size is used early in the boot before userconfig has a chance
 # to change it).
 #
 
 #
 # Optional ISA and EISA devices:
 #
 
 #
 # SCSI host adapters: `aha', `aic', `bt', `nca'
 #
 # aha: Adaptec 154x
 # ahc: Adaptec 274x/284x/294x
 # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!)
 # bt: Most Buslogic controllers
 # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130
 # uha: UltraStore 14F and 34F
 # sea: Seagate ST01/02 8 bit controller (slow!)
 # wds: Western Digital WD7000 controller (no scatter/gather!).
 #
 # Note that the order is important in order for Buslogic cards to be
 # probed correctly.
 #
 
 controller	bt0	at isa? port "IO_BT0" bio irq ? vector bt_isa_intr
 controller	aha0	at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr
 controller	uha0	at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr
 
 controller      aic0    at isa? port 0x340 bio irq 11 vector aicintr
 controller	nca0	at isa? port 0x1f88 bio irq 10 vector ncaintr
 controller	nca1	at isa? port 0x1f84
 controller	nca2	at isa? port 0x1f8c
 controller	nca3	at isa? port 0x1e88
 controller	nca4	at isa? port 0x350 bio irq 5 vector ncaintr
 
 controller	sea0	at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr
 controller	wds0	at isa? port 0x350 bio irq 15 drq 6 vector wdsintr
 
 #
 # ST-506, ESDI, and IDE hard disks: `wdc' and `wd'
 #
 # The flags fields are used to enable the multi-sector I/O and
 # the 32BIT I/O modes.  The flags may be used in either the controller
 # definition or in the individual disk definitions.  The controller
 # definition is supported for the boot configuration stuff.
 #
 # Each drive has a 16 bit flags value defined:
 #	The low 8 bits are the maximum value for the multi-sector I/O,
 #	where 0xff defaults to the maximum that the drive can handle.
 #	The high bit of the 16 bit flags (0x8000) allows probing for
 #	32 bit transfers.  Bit 14 (0x4000) enables a hack to wake
 #	up powered-down laptop drives.  Bit 13 (0x2000) allows
 #	probing for PCI IDE DMA controllers, such as Intel's PIIX
 #	south bridges.  See the wd.4 man page.
 #
 # The flags field for the drives can be specified in the controller
 # specification with the low 16 bits for drive 0, and the high 16 bits
 # for drive 1.
 # e.g.:
 #controller	wdc0	at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr
 #
 # specifies that drive 0 will be allowed to probe for 32 bit transfers and
 # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be
 # allowed to probe for 32 bit transfers, but will allow multi-sector
 # transfers up to the maximum that the drive supports.
 #
 # If you are using a PCI controller that is not running in compatibility
 # mode (for example, it is a 2nd IDE PCI interface), then use config line(s)
 # such as:
 #
 #controller	wdc2	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd4	at wdc2 drive 0
 #disk		wd5	at wdc2 drive 1
 #
 #controller	wdc3	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd6	at wdc3 drive 0
 #disk		wd7	at wdc3 drive 1
 #
 # Note that the above config would be useful for a Promise card, when used
 # on a MB that already has a PIIX controller.  Note the bogus irq and port
 # entries.  These are automatically filled in by the IDE/PCI support.
 #
 
 controller	wdc0	at isa? port "IO_WD1" bio irq 14 vector wdintr
 disk		wd0	at wdc0 drive 0
 disk		wd1	at wdc0 drive 1
 controller	wdc1	at isa? port "IO_WD2" bio irq 15 vector wdintr
 disk		wd2	at wdc1 drive 0
 disk		wd3	at wdc1 drive 1
 
 #
 # Options for `wdc':
 #
 # CMD640 enables serializing access to primary and secondary channel
 # of the CMD640B IDE Chip. The serializing will only take place
 # if this option is set *and* the chip is probed by the pci-system.
 #
 options         "CMD640"	#Enable work around for CMD640 h/w bug
 #
 # ATAPI enables the support for ATAPI-compatible IDE devices
 #
 options         ATAPI   #Enable ATAPI support for IDE bus
 options		ATAPI_STATIC	#Don't do it as an LKM
 
 # IDE CD-ROM driver - requires wdc controller and ATAPI option
 device          wcd0
 
 # IDE floppy driver - requires wdc controller and ATAPI option
 device          wfd0
 
 
 #
 # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft'
 #
 controller	fdc0	at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options		FDC_DEBUG
 # This option is undocumented on purpose.
 options		FDC_PRINT_BOGUS_CHIPTYPE
 #
 # Activate this line instead of the fdc0 line above if you happen to
 # have an Insight floppy tape.  Probing them proved to be dangerous
 # for people with floppy disks only, so it's "hidden" behind a flag:
 #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr
 
 disk		fd0	at fdc0 drive 0
 disk		fd1	at fdc0 drive 1
 tape		ft0	at fdc0 drive 2
 
 
 #
 # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc.
 #
 # lpt: printer port
 #	lpt specials:
 #		port can be specified as ?, this will cause the driver to scan
 #		the BIOS port list;
 #		the irq and vector clauses may be omitted, this
 #		will force the port into polling mode.
 # mse: Logitech and ATI InPort bus mouse ports
 # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd]
 # sio: serial ports (see sio(4))
 
 device		lpt0	at isa? port? tty irq 7 vector lptintr
 device		lpt1	at isa? port "IO_LPT3" tty irq 5 vector lptintr
 device		mse0	at isa? port 0x23c tty irq 5 vector mseintr
 device		psm0	at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr
 
 # Options for psm:
 options		PSM_HOOKAPM		#hook the APM resume event, useful
 					#for some laptops
 options		PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 device		sio0	at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr
 
 #
 # `flags' for serial drivers that support consoles (only for sio now):
 #	0x10	enable console support for this unit.  The other console flags
 #		are ignored unless this is set.  Enabling console support does
 #		not make the unit the preferred console - boot with -h or set
 #		the 0x20 flag for that.  Currently, at most one unit can have
 #		console support; the first one (in config file order) with
 #		this flag set is preferred.  Setting this flag for sio0 gives
 #		the old behaviour.
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #
 # PnP `flags' (set via userconfig using pnp x flags y)
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 #
 
 # Options for serial drivers that support consoles (only for sio now):
 options		BREAK_TO_DEBUGGER	#a BREAK on a comconsole goes to 
 					#DDB, if available.
 options		CONSPEED=9600		#default speed for serial console (default 9600)
 
 # Options for sio:
 options		COM_ESP			#code for Hayes ESP
 options		COM_MULTIPORT		#code for some cards with shared IRQs
 options		DSI_SOFT_MODEM		#code for DSI Softmodems
 options		"EXTRA_SIO=2"		#number of extra sio ports to allocate
 
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc'
 #
 # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp)
 # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
 # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 # el: 3Com 3C501 (slow!)
 # ep: 3Com 3C509 (buggy)
 # fe: Fujitsu MB86960A/MB86965A Ethernet
 # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress
 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
 #     DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
 # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL)
 # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
 # wl: Lucent Wavelan (ISA card only).
 # ze: IBM/National Semiconductor PCMCIA ethernet controller.
 # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for
 #     send/receive operation, but it needs 'iomem' to read/write the
 #     attribute memory)
 #
 
 device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr
 device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr
 device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr
 device eg0 at isa? port 0x310 net irq 5 vector egintr
 device el0 at isa? port 0x300 net irq 9 vector elintr
 device ep0 at isa? port 0x300 net irq 10 vector epintr
 device ex0 at isa? port? net irq? vector exintr
 device fe0 at isa? port 0x300 net irq ? vector feintr
 device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr
 device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr
 device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr
 device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr
 device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr
 options		WLCACHE		# enables the signal-strength cache
 options		WLDEBUG		# enables verbose debugging output
 device wl0 at isa? port 0x300 net irq ? vector wlintr
 # We can (bogusly) include both the dedicated PCCARD drivers and the generic
 # support when COMPILING_LINT.
 device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr
 device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr
 
 #
 # ATM related options
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # atm pseudo-device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at 
 # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html
 #
 pseudo-device	atm
 device en0
 device en1
 options		NATM			#native ATM
 
 #
 # Audio drivers: `snd', `sb', `pas', `gus', `pca'
 #
 # snd: Voxware sound support code
 # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum
 # sbxvi: SoundBlaster 16
 # sbmidi: SoundBlaster 16 MIDI interface
 # pas: ProAudioSpectrum PCM and MIDI
 # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX
 # gusxvi: Gravis Ultrasound 16-bit PCM	(do not use)
 # mss: Microsoft Sound System
 # css: Crystal Sound System (CSS 423x PnP)
 # sscape: Ensoniq Soundscape MIDI interface
 # sscape_mss: Ensoniq Soundscape PCM (requires sscape)
 # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum
 # uart: stand-alone 6850 UART for MIDI
 # mpu: Roland MPU-401 stand-alone card
 # 
 # Beware!  The addresses specified below are also hard-coded in
 # i386/isa/sound/sound_config.h.  If you change the values here, you
 # must also change the values in the include file.
 #
 # pcm: PCM audio through various sound cards.
 #
 # This is the work in progress from Luigi Rizzo.  This has support for
 # CS423x based cards, OPTi931, SB16 PnP, GusPnP.  For more information
 # about this driver, take a look at sys/i386/isa/snd/README.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # This driver will use the new PnP code if it's available.
 #
 # pca: PCM audio through your PC speaker
 #
 # If you have a GUS-MAX card and want to use the CS4231 codec on the
 # card the drqs for the gus max must be 8 bit (1, 2, or 3).
 # 
 # If you would like to use the full duplex option on the gus, then define
 # flags to be the ``read dma channel''.
 #
 # options BROKEN_BUS_CLOCK	#PAS-16 isn't working and OPTI chipset
 # options SYMPHONY_PAS		#PAS-16 isn't working and SYMPHONY chipset
 # options EXCLUDE_SBPRO		#PAS-16
 # options SBC_IRQ=5		#PAS-16. Must match irq on sb0 line.
 # PAS16: The order of the pas0/sb0/opl0 is important since the
 #	sb emulation is enabled in the pas-16 attach.
 #
 # The i386/isa/sound/sound.doc has more information.
 
 # Controls all "VOXWARE" driver sound devices.  See Luigi's driver
 # below for an alternate which may work better for some cards.
 #
 controller	snd0
 device pas0     at isa? port 0x388 irq 10 drq 6 vector pasintr
 device sb0      at isa? port 0x220 irq 5 drq 1 vector sbintr
 device sbxvi0   at isa? drq 5
 device sbmidi0  at isa? port 0x330
 device awe0     at isa? port 0x620
 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr
 #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr
 device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr
 device css0	at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr
 device sscape0  at isa? port 0x330 irq 9 drq 0 vector sscapeintr
 device trix0    at isa? port 0x330 irq 6 drq 0 vector sscapeintr
 device sscape_mss0  at isa? port 0x534 irq 5 drq 1 vector sndintr
 device opl0     at isa? port 0x388
 device mpu0     at isa? port 0x330 irq 6 drq 0
 device uart0 at isa? port 0x330 irq 5 vector "m6850intr"
 
 # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!).
 # You may also wish to enable the pnp controller with this, for pnp
 # sound cards.
 #
 #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr
 
 # Not controlled by `snd'
 device pca0 at isa? port IO_TIMER1 tty
 
 #
 # Miscellaneous hardware:
 #
 # mcd: Mitsumi CD-ROM
 # scd: Sony CD-ROM
 # matcd: Matsushita/Panasonic CD-ROM
 # wt: Wangtek and Archive QIC-02/QIC-36 tape drives
 # ctx: Cortex-I frame grabber
 # apm: Laptop Advanced Power Management (experimental)
 # spigot: The Creative Labs Video Spigot video-acquisition board
 # meteor: Matrox Meteor video capture board
 # alog: Industrial Computer Source AIO8-P driver
 # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html)
 # cy: Cyclades serial driver
 # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
 # gp:  National Instruments AT-GPIB and AT-GPIB/TNT board
 # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
 # gsc: Genius GS-4500 hand scanner.
 # joy: joystick
 # labpc: National Instrument's Lab-PC and Lab-PC+
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA) - single card
 # tw: TW-523 power line interface for use with X-10 home control products
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
 # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
 
 #
 # Notes on APM
 #  The flags takes the following meaning for apm0:
 #    0x0020  Statclock is broken.
 #    0x0011  Limit APM protocol to 1.1 or 1.0
 #    0x0010  Limit APM protocol to 1.0
 #
 #
 # Notes on the spigot:
 #  The video spigot is at 0xad6.  This port address can not be changed.
 #  The irq values may only be 10, 11, or 15
 #  I/O memory is an 8kb region.  Possible values are:
 #    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
 #    The start address must be on an even boundary.
 #  Add the following option if you want to allow non-root users to be able
 #  to access the spigot.  This option is not secure because it allows users
 #  direct access to the I/O page. 
 #  	options SPIGOT_UNSECURE
 #
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #   Comtrol Rocketport ISA single card
 #               device  rp0     at isa? port 0x280 tty
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel configuration file:
 #
 #               device  rp0     at isa? port 0x100 tty
 #               device  rp1     at isa? port 0x180 tty
 #
 #   For 4 ISA cards, it might be something like this:
 #
 #               device  rp0     at isa? port 0x180 tty
 #               device  rp1     at isa? port 0x100 tty
 #               device  rp2     at isa? port 0x340 tty
 #               device  rp3     at isa? port 0x240 tty
 #
 #   And for PCI cards, you only need say:
 #
 #               device rp0
 #               device rp1
 #               ...
 #   Note: Make sure that any Rocketport PCI devices are specified BEFORE the
 #   ISA Rocketport devices.
 
 # Notes on the Digiboard driver:
 #
 # The following flag values have special meanings:
 #	0x01 - alternate layout of pins
 #	0x02 - use the windowed PC/Xe in 64K mode
 
 # Notes on the Specialix SI/XIO driver:
 #  **This is NOT a Specialix supported Driver!**
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 # Notes on the Stallion stl and stli drivers:
 #  See src/i386/isa/README.stl for complete instructions.
 #  This is version 0.0.5alpha, unsupported by Stallion.
 #  The stl driver has a secondary IO port hard coded at 0x280.  You need
 #     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
 #  The "flags" and "iosiz" settings on the stli driver depend on the board:
 #	EasyConnection 8/64 ISA:     flags 23         iosiz 0x1000
 #	EasyConnection 8/64 EISA:    flags 24         iosiz 0x10000
 #	EasyConnection 8/64 MCA:     flags 25         iosiz 0x1000
 #	ONboard ISA:                 flags 4          iosiz 0x10000
 #	ONboard EISA:                flags 7          iosiz 0x10000
 #	ONboard MCA:                 flags 3          iosiz 0x10000
 #	Brumby:                      flags 2          iosiz 0x4000
 #	Stallion:                    flags 1          iosiz 0x10000
 
 device		mcd0	at isa? port 0x300 bio irq 10 vector mcdintr
 # for the Sony CDU31/33A CDROM
 device		scd0	at isa? port 0x230 bio
 # for the SoundBlaster 16 multicd - up to 4 devices
 controller      matcd0  at isa? port 0x230 bio
 device		wt0	at isa? port 0x300 bio irq 5 drq 1 vector wtintr
 device		ctx0	at isa? port 0x230 iomem 0xd0000
 device		spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr
 device		apm0	at isa? 
 device		gp0	at isa? port 0x2c0 tty
 device		gsc0	at isa? port "IO_GSC1" tty drq 3
 device		joy0	at isa? port "IO_GAME"
 device          alog0   at isa? port 0x260 tty irq 5 vector alogintr
 device		cy0	at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr
 device		dgb0	at isa? port 0x220 iomem 0xfc0000 iosiz ? tty
 device		labpc0	at isa? port 0x260 tty irq 5 vector labpcintr
 device          rc0     at isa? port 0x220 tty irq 12 vector rcintr
 device          rp0     at isa? port 0x280 tty
 # the port and irq for tw0 are fictitious
 device          tw0     at isa? port 0x380 tty irq 11 vector twintr
 device		si0	at isa? iomem 0xd0000 tty irq 12 vector siintr
 device		asc0	at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr
 device		bqu0	at isa? port 0x150
 device		stl0	at isa? port 0x2a0 tty irq 10 vector stlintr
 device		stli0	at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000
 device		loran0	at isa? port ? tty irq 5 vector loranintr
 
 #
 # EISA devices:
 #
 # The EISA bus device is eisa0.  It provides auto-detection and
 # configuration support for all devices on the EISA bus.
 #
 # The `ahb' device provides support for the Adaptec 174X adapter.
 #
 # The `ahc' device provides support for the Adaptec 274X and 284X
 # adapters.  The 284X, although a VLB card responds to EISA probes.
 #
 # fea: DEC DEFEA EISA FDDI adapter
 #
 controller	eisa0
 controller	ahb0
 controller	ahc0
 device		fea0
 
 # enable tagged command queuing, which is a major performance win on
 # devices that support it (and controllers with enough SCB's)
 options	AHC_TAGENABLE
 
 # enable SCB paging - See the ahc.4 man page
 options	AHC_SCBPAGING_ENABLE
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options AHC_ALLOW_MEMIO
 
 # By default, only 10 EISA slots are probed, since the slot numbers
 # above clash with the configuration address space of the PCI subsystem,
 # and the EISA probe is not very smart about this.  This is sufficient
 # for most machines, but in particular the HP NetServer LC series comes
 # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
 # thus you need to bump this figure to 12 for them.
 options	"EISA_SLOTS=12"
 
 #
 # PCI devices:
 #
 # The main PCI bus device is `pci'.  It provides auto-detection and
 # configuration support for all devices on the PCI bus, using either
 # configuration mode defined in the PCI specification.
 #
 # The `ahc' device provides support for the Adaptec 29/3940(U)(W)
 # and motherboard based AIC7870/AIC7880 adapters.
 #
 # The `ncr' device provides support for the NCR 53C810 and 53C825
 # self-contained SCSI host adapters.
 #
 # The `amd' device provides support for the Tekram DC-390 and 390T
 # SCSI host adapters, but is expected to work with any AMD 53c974
 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some
 # local patches were applied to the sources (that had originally
 # been written by Tekram and limited to work with their SCSI cards).
 #
 # The `de' device provides support for the Digital Equipment DC21040
 # self-contained Ethernet adapter.
 #
 # The `fxp' device provides support for the Intel EtherExpress Pro/100B
 # PCI Fast Ethernet adapters.
 #
 # The `tx' device provides support for the SMC 9432TX cards.
 #
 # The `vx' device provides support for the 3Com 3C590 and 3C595
 # early support
 #
 # The `fpa' device provides support for the Digital DEFPA PCI FDDI
 # adapter. pseudo-device fddi is also needed.
 #
 # The `meteor' device is a PCI video capture board. It can also have the
 # following options:
 #   options METEOR_ALLOC_PAGES=xxx	preallocate kernel pages for data entry
 #	figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE
 #   options METEOR_DEALLOC_PAGES	remove all allocated pages on close(2)
 #   options METEOR_DEALLOC_ABOVE=xxx	remove all allocated pages above the
 #	specified amount. If this value is below the allocated amount no action
 #	taken
 #   option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used
 #	for initialization of fps routine when a signal is not present.
 #
 # The 'bktr' device is a PCI video capture board. It also has a TV tuner
 # on board.
 #
 controller	pci0
 controller	ahc1
 controller	ncr0
 controller	amd0
 device		de0
 device		fxp0
 device		tx0
 device		vx0
 device		fpa0
 device		meteor0
 device		bktr0
 
 
 #
 # PCCARD/PCMCIA
 #
 # card: slot controller
 # pcic: slots
 controller	card0
 controller	pcic0 at card?
 controller	pcic1 at card?
 
 #
 # Laptop/Notebook options:
 #
 # See also:
 #  apm under `Miscellaneous hardware'
 # above.
 
 # For older notebooks that signal a powerfail condition (external
 # power supply dropped, or battery state low) by issuing an NMI:
 
 options		POWERFAIL_NMI	# make it beep instead of panicing
 
 #
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'sd'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # nlpt	Parallel Printer
 # ppi	General-purpose I/O ("Geek Port")
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.  
 #
 controller	ppbus0
 controller	vpo0	at ppbus?
 device		nlpt0	at ppbus?
 device		ppi0	at ppbus?
 device		pps0	at ppbus?
 
 controller	ppc0	at isa? disable port ? irq 7 vector ppcintr
 
 # Kernel BOOTP support 
 
 options		BOOTP		# Use BOOTP to obtain IP address/hostname
 options		BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options		"BOOTP_NFSV3"	# Use NFS v3 to NFS mount root
 options		BOOTP_COMPAT	# Workaround for broken bootp daemons.
 
 #
 # An obsolete option to test kern_opt.c.
 #
 options		GATEWAY
 
 # If you want to disable loadable kernel modules (LKM), you
 # might want to use this option.
 #options		NO_LKM
 
 #
 # Add tie-ins for a hardware watchdog.  This only enable the hooks;
 # the user must still supply the actual driver.
 #
 options		HW_WDOG
 
 # More undocumented options for linting.
 
 options		CLK_CALIBRATION_LOOP
 options		"CLK_USE_I8254_CALIBRATION"
 options		CLK_USE_TSC_CALIBRATION
 options		CLUSTERDEBUG
 options		COMPAT_LINUX
 options		CPU_UPGRADE_HW_CACHE
 options		DEBUG
 options		"DEBUG_1284"
 options		DEVFS_ROOT
 #options	DISABLE_PSE
 options		"EXT2FS"
 options		"I586_PMC_GUPROF=0x70000"
 options		"IBCS2"
 # broken:
 #options		IPFILTER
 options		KEY
 options		KEY_DEBUG
 options		LOCKF_DEBUG
 options		LOUTB
 options		KBD_MAXRETRY=4
 options		KBD_MAXWAIT=6
 options		KBD_RESETDELAY=201
 options		KBDIO_DEBUG=2
 options		MSGMNB=2049
 options		MSGMNI=41
 options		MSGSEG=2049
 options		MSGSSZ=16
 options		MSGTQL=41
 options		NBUF=512
 options		NETATALKDEBUG
 options		NMBCLUSTERS=1024
 options		NPX_DEBUG
 options		NULLFS_DIAGNOSTIC
 options		PANIC_REBOOT_WAIT_TIME=16
 options		"PCVT_24LINESDEF"
 options		PCVT_CTRL_ALT_DEL
 options		PCVT_EMU_MOUSE
 options		PCVT_FREEBSD=211
 options		PCVT_META_ESC
 options		PCVT_NSCREENS=9
 options		PCVT_PRETTYSCRNS
 options		PCVT_SCANSET=2
 options		PCVT_SCREENSAVER
 options		PCVT_USEKBDSEC
 options		"PCVT_VT220KEYB"
 options		PSM_DEBUG=1
 options		"SCSI_2_DEF"
 options		SCSI_DELAY=8	# Be pessimistic about Joe SCSI device
 options		SCSI_NCR_DEBUG
 options		SCSI_NCR_DFLT_TAGS=4
 options		SCSI_NCR_MAX_SYNC=10000
 options		SCSI_NCR_MAX_WIDE=1
 options		SCSI_NCR_MYADDR=7
 options		SEMMAP=31
 options		SEMMNI=11
 options		SEMMNS=61
 options		SEMMNU=31
 options		SEMMSL=61
 options		SEMOPM=101
 options		SEMUME=11
 options		SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options		SHMALL=1025
 options		"SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)"
 options		SHMMAXPGS=1025
 options		SHMMIN=2
 options		SHMMNI=33
 options		SHMSEG=9
 options		SI_DEBUG
 options		SIMPLELOCK_DEBUG
 options		SPX_HACK
 
 # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O
 # See sys/dev/dpt for debugging and other subtle options.
 #   DPT_VERIFY_HINTR        Performs some strict hardware interrupts testing.
 #                           Only use if you suspect PCI bus corruption problems
 #   DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue
 #                           will grow to accomodate increased use.  This growth
 #                           will NOT shrink.  To restrict the number of queue
 #                           slots to exactly what the DPT can hold at one time,
 #                           enable this option.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  Assumed to be enabled by
 #                           /usr/sbin/dpt_* tools.
 #   DPT_FREELIST_IS_STACK   For optimat L{1,2} CPU cache utilization, enable
 #                           this option.  Otherwise, the transaction queue is
 #                           a LIFO.  I cannot measure the performance gain.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 
 controller      dpt0
 
 # DPT options
 options DPT_VERIFY_HINTR
 options DPT_RESTRICTED_FREELIST
 options DPT_MEASURE_PERFORMANCE
 options DPT_FREELIST_IS_STACK
 options DPT_HANDLE_TIMEOUTS
 options DPT_TIMEOUT_FACTOR=4
 options	DPT_INTR_DELAY=200      # Some motherboards need that
 options DPT_LOST_IRQ
Index: head/sys/conf/files
===================================================================
--- head/sys/conf/files	(revision 34265)
+++ head/sys/conf/files	(revision 34266)
@@ -1,456 +1,458 @@
 aicasm				optional	ahc	device-driver	   \
 	dependency 	"$S/dev/aic7xxx/*.[chyl]"			   \
 	compile-with	"make -f $S/dev/aic7xxx/Makefile MAKESRCPATH=$S/dev/aic7xxx" \
 	no-obj no-implicit-rule						   \
 	clean		"aicasm"
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
 # dependency lines other than the first are silently ignored.
 #
 aic7xxx_{seq,reg}.h		optional	ahc	device-driver	   \
 	compile-with	"./aicasm ${INCLUDES} -o aic7xxx_seq.h -r aic7xxx_reg.h $S/dev/aic7xxx/aic7xxx.seq"   \
 	no-obj no-implicit-rule before-depend				   \
 	clean		"aic7xxx_seq.h aic7xxx_reg.h"			   \
 	dependency	"$S/dev/aic7xxx/aic7xxx.{reg,seq} $S/scsi/scsi_message.h aicasm"
 cfs/cfs_namecache.c	optional vcfs
 cfs/cfs_nbsd.c		optional vcfs
 cfs/cfs_fbsd.c		optional vcfs
 cfs/cfs_psdev.c		optional vcfs
 cfs/cfs_subr.c		optional vcfs
 cfs/cfs_venus.c		optional vcfs
 cfs/cfs_vfsops.c	optional vcfs
 cfs/cfs_vnodeops.c	optional vcfs
 ddb/db_access.c		optional ddb
 ddb/db_aout.c		optional ddb
 ddb/db_break.c		optional ddb
 ddb/db_command.c	optional ddb
 ddb/db_examine.c	optional ddb
 ddb/db_expr.c		optional ddb
 ddb/db_input.c		optional ddb
 ddb/db_lex.c		optional ddb
 ddb/db_output.c		optional ddb
 ddb/db_print.c		optional ddb
 ddb/db_ps.c		optional ddb
 ddb/db_run.c		optional ddb
 ddb/db_sym.c		optional ddb
 ddb/db_trap.c		optional ddb
 ddb/db_variables.c	optional ddb
 ddb/db_watch.c		optional ddb
 ddb/db_write_cmd.c	optional ddb
 dev/ccd/ccd.c		optional ccd device-driver
 dev/dpt/dpt_control.c	optional dpt device-driver
 dev/dpt/dpt_scsi.c	optional dpt device-driver
 dev/en/midway.c		optional en device-driver
 dev/pdq/pdq.c		optional fea device-driver
 dev/pdq/pdq_ifsubr.c	optional fea device-driver
 dev/pdq/pdq.c		optional fpa device-driver
 dev/pdq/pdq_ifsubr.c	optional fpa device-driver
 dev/ppbus/nlpt.c	optional nlpt
 dev/ppbus/ppb_base.c	optional ppbus
 dev/ppbus/ppb_1284.c	optional ppbus
 dev/ppbus/ppbconf.c	optional ppbus
 dev/ppbus/ppi.c		optional ppi
 dev/ppbus/pps.c		optional pps
 dev/ppbus/vpo.c		optional vpo
 dev/vn/vn.c		optional vn
 dev/vx/if_vx.c		optional vx  device-driver
 gnu/ext2fs/ext2_alloc.c		optional ext2fs
 gnu/ext2fs/ext2_balloc.c	optional ext2fs
 gnu/ext2fs/ext2_inode.c		optional ext2fs
 gnu/ext2fs/ext2_inode_cnv.c	optional ext2fs
 gnu/ext2fs/ext2_linux_balloc.c	optional ext2fs
 gnu/ext2fs/ext2_linux_ialloc.c	optional ext2fs
 gnu/ext2fs/ext2_lookup.c	optional ext2fs
 gnu/ext2fs/ext2_subr.c		optional ext2fs
 gnu/ext2fs/ext2_vfsops.c	optional ext2fs
 gnu/ext2fs/ext2_vnops.c		optional ext2fs
 isofs/cd9660/cd9660_bmap.c	optional cd9660
 isofs/cd9660/cd9660_lookup.c	optional cd9660
 isofs/cd9660/cd9660_node.c	optional cd9660
 isofs/cd9660/cd9660_rrip.c	optional cd9660
 isofs/cd9660/cd9660_util.c	optional cd9660
 isofs/cd9660/cd9660_vfsops.c	optional cd9660
 isofs/cd9660/cd9660_vnops.c	optional cd9660
 kern/imgact_aout.c	standard
 kern/imgact_elf.c	standard
 kern/imgact_gzip.c	optional gzip
 kern/imgact_shell.c	standard
 kern/inflate.c		optional gzip
 kern/init_main.c	standard
 kern/init_sysent.c	standard
 kern/init_sysvec.c	standard
 kern/kern_intr.c	standard
 kern/kern_module.c	standard
 kern/kern_linker.c	standard
 kern/link_aout.c	standard
 kern/kern_acct.c	standard
 kern/kern_clock.c	standard
 kern/kern_conf.c	standard
 kern/kern_descrip.c	standard
 kern/kern_exec.c	standard
 kern/kern_exit.c	standard
 kern/kern_fork.c	standard
 kern/kern_ktrace.c	standard
 kern/kern_lkm.c		standard
 kern/kern_lock.c	standard
 kern/kern_lockf.c	standard
 kern/kern_malloc.c	standard
 kern/kern_mib.c		standard
 kern/kern_ntptime.c	standard
 kern/kern_opt.c		standard
 kern/kern_physio.c	standard
 kern/kern_proc.c	standard
 kern/kern_prot.c	standard
 kern/kern_resource.c	standard
 kern/kern_shutdown.c	standard
 kern/kern_sig.c		standard
 kern/kern_subr.c	standard
 kern/kern_synch.c	standard
 kern/kern_sysctl.c	standard
 kern/kern_time.c	standard
 kern/kern_timeout.c	standard
 kern/kern_xxx.c		standard
 kern/md5c.c		optional md5
 kern/md5c.c		optional sppp
 kern/subr_diskslice.c	standard
 kern/subr_autoconf.c	standard
 kern/subr_dkbad.c	standard
 kern/subr_log.c		standard
 kern/subr_prf.c		standard
 kern/subr_prof.c	standard
 kern/subr_rlist.c	standard
 kern/subr_xxx.c		standard
 kern/sys_generic.c	standard
 kern/sys_pipe.c		standard
 kern/sys_process.c	standard
 kern/sys_socket.c	standard
 kern/sysv_ipc.c		standard
 kern/sysv_msg.c		optional sysvmsg
 kern/sysv_sem.c		optional sysvsem
 kern/sysv_shm.c		optional sysvshm
 kern/tty.c		standard
 kern/tty_compat.c	standard
 kern/tty_conf.c		standard
 kern/tty_pty.c		optional pty
 kern/tty_snoop.c	optional snp
 kern/tty_subr.c		standard
 kern/tty_tb.c		optional tb
 kern/tty_tty.c		standard
 kern/uipc_domain.c	standard
 kern/uipc_mbuf.c	standard
 kern/uipc_proto.c	standard
 kern/uipc_socket.c	standard
 kern/uipc_socket2.c	standard
 kern/uipc_syscalls.c	standard
 kern/uipc_usrreq.c	standard
 kern/vfs_bio.c		standard
 kern/vfs_cache.c	standard
 kern/vfs_cluster.c	standard
 kern/vfs_conf.c		standard
 kern/vfs_default.c	standard
 kern/vfs_init.c		standard
 kern/vfs_lookup.c	standard
 kern/vfs_subr.c		standard
 kern/vfs_syscalls.c	standard
 kern/vfs_vnops.c	standard
 kern/kern_threads.c	standard
 kern/vfs_aio.c		standard
 miscfs/deadfs/dead_vnops.c	standard
 miscfs/devfs/devfs_tree.c	optional devfs
 miscfs/devfs/devfs_vfsops.c	optional devfs
 miscfs/devfs/devfs_vnops.c	optional devfs
 miscfs/fdesc/fdesc_vfsops.c	optional fdesc
 miscfs/fdesc/fdesc_vnops.c	optional fdesc
 miscfs/fifofs/fifo_vnops.c	standard
 miscfs/kernfs/kernfs_vfsops.c	optional kernfs
 miscfs/kernfs/kernfs_vnops.c	optional kernfs
 miscfs/nullfs/null_subr.c	optional nullfs
 miscfs/nullfs/null_vfsops.c	optional nullfs
 miscfs/nullfs/null_vnops.c	optional nullfs
 miscfs/portal/portal_vfsops.c	optional portal
 miscfs/portal/portal_vnops.c	optional portal
 miscfs/procfs/procfs_ctl.c	optional procfs
 miscfs/procfs/procfs_fpregs.c	standard
 miscfs/procfs/procfs_map.c	optional procfs
 miscfs/procfs/procfs_mem.c	standard
 miscfs/procfs/procfs_note.c	optional procfs
 miscfs/procfs/procfs_regs.c	standard
 miscfs/procfs/procfs_status.c	optional procfs
 miscfs/procfs/procfs_subr.c	optional procfs
 miscfs/procfs/procfs_type.c	optional procfs
 miscfs/procfs/procfs_vfsops.c	optional procfs
 miscfs/procfs/procfs_vnops.c	optional procfs
 miscfs/specfs/spec_vnops.c	standard
 miscfs/umapfs/umap_subr.c	optional umapfs
 miscfs/umapfs/umap_vfsops.c	optional umapfs
 miscfs/umapfs/umap_vnops.c	optional umapfs
 miscfs/union/union_subr.c	optional union
 miscfs/union/union_vfsops.c	optional union
 miscfs/union/union_vnops.c	optional union
 msdosfs/msdosfs_conv.c		optional msdosfs
 msdosfs/msdosfs_denode.c	optional msdosfs
 msdosfs/msdosfs_fat.c		optional msdosfs
 msdosfs/msdosfs_lookup.c	optional msdosfs
 msdosfs/msdosfs_vfsops.c	optional msdosfs
 msdosfs/msdosfs_vnops.c		optional msdosfs
 net/bpf.c		optional bpfilter
 net/bpf_filter.c	optional bpfilter
 net/bsd_comp.c		optional ppp_bsdcomp
 net/hostcache.c		standard
 net/if.c		standard
 net/if_atmsubr.c	optional atm
 net/if_disc.c		optional disc
 net/if_ethersubr.c	optional ether
 net/if_fddisubr.c	optional fddi
 net/if_loop.c		optional loop
 net/if_media.c		standard
 net/if_mib.c		standard
 net/if_ppp.c		optional ppp
 net/if_sl.c		optional sl
 net/if_spppsubr.c	optional sppp
 net/if_tun.c		optional tun
 net/ppp_deflate.c	optional ppp_deflate
 net/ppp_tty.c		optional ppp
 net/radix.c		standard
 net/raw_cb.c		standard
 net/raw_usrreq.c	standard
 net/route.c		standard
 net/rtsock.c		standard
 net/slcompress.c	optional ppp
 net/slcompress.c	optional sl
 net/zlib.c		optional ppp_deflate
 netatalk/aarp.c		optional netatalk
 netatalk/at_control.c	optional netatalk
 netatalk/at_proto.c	optional netatalk
 netatalk/at_rmx.c	optional netatalkdebug
 netatalk/ddp_input.c	optional netatalk
 netatalk/ddp_output.c	optional netatalk
 netatalk/ddp_usrreq.c	optional netatalk
 #netccitt/ccitt_proto.c	optional ccitt
 #netccitt/hd_debug.c	optional hdlc
 #netccitt/hd_input.c	optional hdlc
 #netccitt/hd_output.c	optional hdlc
 #netccitt/hd_subr.c	optional hdlc
 #netccitt/hd_timer.c	optional hdlc
 #netccitt/if_x25subr.c	optional ccitt
 #netccitt/llc_input.c	optional llc
 #netccitt/llc_output.c	optional llc
 #netccitt/llc_subr.c	optional llc
 #netccitt/llc_timer.c	optional llc
 #netccitt/pk_acct.c	optional ccitt
 #netccitt/pk_debug.c	optional ccitt
 #netccitt/pk_input.c	optional ccitt
 #netccitt/pk_llcsubr.c	optional hdlc
 #netccitt/pk_llcsubr.c	optional llc
 #netccitt/pk_output.c	optional ccitt
 #netccitt/pk_subr.c	optional ccitt
 #netccitt/pk_timer.c	optional ccitt
 #netccitt/pk_usrreq.c	optional ccitt
 #netimp/if_imp.c		optional imp
 #netimp/if_imphost.c	optional imp
 #netimp/raw_imp.c	optional imp
 netinet/if_atm.c	optional atm
 netinet/if_ether.c	optional ether
 netinet/igmp.c		optional inet
 netinet/in.c		optional inet
 netinet/in_hostcache.c	optional inet
 netinet/in_pcb.c	optional inet
 netinet/in_proto.c	optional inet
 netinet/in_rmx.c	optional inet
 netinet/ip_divert.c	optional ipdivert
 netinet/ip_fw.c		optional ipfirewall
 netinet/ip_icmp.c	optional inet
 netinet/ip_input.c	optional inet
 netinet/ip_mroute.c	optional inet
 netinet/ip_output.c	optional inet
 netinet/raw_ip.c	optional inet
 netinet/tcp_debug.c	optional tcpdebug
 netinet/tcp_input.c	optional inet
 netinet/tcp_output.c	optional inet
 netinet/tcp_subr.c	optional inet
 netinet/tcp_timer.c	optional inet
 netinet/tcp_usrreq.c	optional inet
 netinet/udp_usrreq.c	optional inet
 netinet/ip_fil.c	optional ipfilter inet
 netinet/fil.c		optional ipfilter inet
 netinet/ip_nat.c	optional ipfilter inet
 netinet/ip_frag.c	optional ipfilter inet
 netinet/ip_state.c	optional ipfilter inet
 netinet/ip_proxy.c	optional ipfilter inet
 netinet/mln_ipl.c	optional ipfilter inet
 netipx/ipx.c		optional ipx
 netipx/ipx_cksum.c	optional ipx
 netipx/ipx_input.c	optional ipx
 netipx/ipx_ip.c		optional ipx
 netipx/ipx_outputfl.c	optional ipx
 netipx/ipx_pcb.c	optional ipx
 netipx/ipx_proto.c	optional ipx
 netipx/ipx_tun.c	optional ipx
 netipx/ipx_usrreq.c	optional ipx
 netipx/spx_debug.c	optional ipx
 netipx/spx_usrreq.c	optional ipx
 #netiso/clnp_debug.c	optional iso
 #netiso/clnp_er.c	optional iso
 #netiso/clnp_frag.c	optional iso
 #netiso/clnp_input.c	optional iso
 #netiso/clnp_options.c	optional iso
 #netiso/clnp_output.c	optional iso
 #netiso/clnp_raw.c	optional iso
 #netiso/clnp_subr.c	optional iso
 #netiso/clnp_timer.c	optional iso
 #netiso/cltp_usrreq.c	optional iso
 #netiso/esis.c		optional iso
 #netiso/idrp_usrreq.c	optional iso
 #netiso/if_eon.c		optional eon
 #netiso/iso.c		optional iso
 #netiso/iso_chksum.c	optional iso
 #netiso/iso_pcb.c	optional iso
 #netiso/iso_proto.c	optional iso
 #netiso/iso_snpac.c	optional iso
 #netiso/tp_astring.c	optional iso
 #netiso/tp_astring.c	optional tpip
 #netiso/tp_cons.c	optional iso
 #netiso/tp_driver.c	optional iso
 #netiso/tp_driver.c	optional tpip
 #netiso/tp_emit.c	optional iso
 #netiso/tp_emit.c	optional tpip
 #netiso/tp_inet.c	optional iso
 #netiso/tp_inet.c	optional tpip
 #netiso/tp_input.c	optional iso
 #netiso/tp_input.c	optional tpip
 #netiso/tp_iso.c		optional iso
 #netiso/tp_meas.c	optional iso
 #netiso/tp_meas.c	optional tpip
 #netiso/tp_output.c	optional iso
 #netiso/tp_output.c	optional tpip
 #netiso/tp_pcb.c		optional iso
 #netiso/tp_pcb.c		optional tpip
 #netiso/tp_subr.c	optional iso
 #netiso/tp_subr.c	optional tpip
 #netiso/tp_subr2.c	optional iso
 #netiso/tp_subr2.c	optional tpip
 #netiso/tp_timer.c	optional iso
 #netiso/tp_timer.c	optional tpip
 #netiso/tp_trace.c	optional iso
 #netiso/tp_trace.c	optional tpip
 #netiso/tp_usrreq.c	optional iso
 #netiso/tp_usrreq.c	optional tpip
 #netiso/tuba_subr.c	optional iso tuba
 #netiso/tuba_table.c	optional iso tuba
 #netiso/tuba_usrreq.c	optional iso tuba
 netkey/key.c		optional key
 netkey/key_debug.c	optional key_debug
 netnatm/natm.c		optional natm
 netnatm/natm_pcb.c	optional natm
 netnatm/natm_proto.c	optional natm
 #netns/idp_usrreq.c	optional ns
 #netns/ns.c		optional ns
 #netns/ns_error.c	optional ns
 #netns/ns_input.c	optional ns
 #netns/ns_ip.c		optional ns
 #netns/ns_output.c	optional ns
 #netns/ns_pcb.c		optional ns
 #netns/ns_proto.c	optional ns
 #netns/spp_debug.c	optional ns
 #netns/spp_usrreq.c	optional ns
 nfs/nfs_bio.c		optional nfs
 nfs/nfs_node.c		optional nfs
 nfs/nfs_nqlease.c	optional nfs
 nfs/nfs_serv.c		optional nfs
 nfs/nfs_socket.c	optional nfs
 nfs/nfs_srvcache.c	optional nfs
 nfs/nfs_subs.c		optional nfs
 nfs/nfs_syscalls.c	optional nfs
 nfs/nfs_vfsops.c	optional nfs
 nfs/nfs_vnops.c		optional nfs
 nfs/bootp_subr.c	optional bootp
 nfs/krpc_subr.c		optional bootp
 pccard/pccard.c		optional card
 pccard/pccard_beep.c	optional card
 pccard/pcic.c		optional pcic device-driver
 pci/pcic_p.c		optional pcic device-driver
 pci/aic7870.c		optional ahc device-driver			\
 	dependency	"aic7xxx_reg.h $S/pci/aic7870.c"
 pci/brooktree848.c	optional bktr device-driver
 pci/bt9xx.c		optional bt device-driver
 pci/dpt_pci.c		optional dpt device-driver
 pci/cy_pci.c		optional cy device-driver
 pci/if_de.c		optional de device-driver
 pci/if_ed_p.c		optional ed device-driver
 pci/if_en_pci.c		optional en device-driver
 pci/if_fxp.c		optional fxp device-driver
 pci/if_lnc_p.c		optional lnc device-driver
 pci/if_fpa.c		optional fpa device-driver
 pci/if_sr_p.c		optional sr device-driver
 pci/if_tx.c		optional tx device-driver
 pci/if_vx_pci.c		optional vx device-driver
 pci/meteor.c		optional meteor device-driver
 pci/ncr.c		optional ncr device-driver
 pci/pci.c		optional pci device-driver
 pci/pci_compat.c	optional pci
 pci/pcisupport.c	optional pci
 pci/tek390.c		optional amd device-driver
 pci/wdc_p.c		optional wdc device-driver
 posix4/posix4_mib.c	optional posix4
 posix4/ksched.c	optional posix4
 scsi/cd.c		optional cd
 scsi/ch.c		optional ch
 scsi/od.c		optional od
 scsi/pt.c		optional pt
 scsi/scsi_base.c	optional scbus
 scsi/scsi_driver.c	optional scbus
 scsi/scsi_ioctl.c	optional scbus
 scsi/scsi_sense.c	optional scbus
 scsi/scsiconf.c		optional scbus
 scsi/sctarg.c		optional sctarg
 scsi/sd.c		optional sd
 scsi/ssc.c		optional ssc
 scsi/st.c		optional st
 scsi/su.c		optional su
 scsi/uk.c		optional scbus
 scsi/worm.c		optional worm
 ufs/ffs/ffs_alloc.c	optional ffs
 ufs/ffs/ffs_alloc.c	optional mfs
 ufs/ffs/ffs_balloc.c	optional ffs
 ufs/ffs/ffs_balloc.c	optional mfs
 ufs/ffs/ffs_inode.c	optional ffs
 ufs/ffs/ffs_inode.c	optional mfs
+ufs/ffs/ffs_softdep_stub.c	optional ffs
+ufs/ffs/ffs_softdep.c	optional softupdates
 ufs/ffs/ffs_subr.c	optional ffs
 ufs/ffs/ffs_subr.c	optional mfs
 ufs/ffs/ffs_tables.c	optional ffs
 ufs/ffs/ffs_tables.c	optional mfs
 ufs/ffs/ffs_vfsops.c	optional ffs
 ufs/ffs/ffs_vfsops.c	optional mfs
 ufs/ffs/ffs_vnops.c	optional ffs
 ufs/ffs/ffs_vnops.c	optional mfs
 ufs/ifs/ifs_isyscalls.c	optional vcfs
 ufs/ifs/ifs_subr.c	optional vcfs
 ufs/mfs/mfs_vfsops.c	optional mfs
 ufs/mfs/mfs_vnops.c	optional mfs
 ufs/ufs/ufs_bmap.c	standard
 ufs/ufs/ufs_disksubr.c	standard
 ufs/ufs/ufs_ihash.c	standard
 ufs/ufs/ufs_inode.c	standard
 ufs/ufs/ufs_lookup.c	standard
 ufs/ufs/ufs_quota.c	standard
 ufs/ufs/ufs_vfsops.c	standard
 ufs/ufs/ufs_vnops.c	standard
 vm/default_pager.c	standard
 vm/device_pager.c	standard
 vm/swap_pager.c		standard
 vm/vm_fault.c		standard
 vm/vm_glue.c		standard
 vm/vm_init.c		standard
 vm/vm_kern.c		standard
 vm/vm_map.c		standard
 vm/vm_meter.c		standard
 vm/vm_mmap.c		standard
 vm/vm_object.c		standard
 vm/vm_page.c		standard
 vm/vm_pageout.c		standard
 vm/vm_pager.c		standard
 vm/vm_swap.c		standard
 vm/vm_unix.c		standard
 vm/vnode_pager.c	standard
 vm/vm_zone.c		standard
Index: head/sys/conf/options
===================================================================
--- head/sys/conf/options	(revision 34265)
+++ head/sys/conf/options	(revision 34266)
@@ -1,189 +1,195 @@
-#	$Id: options,v 1.63 1998/02/27 10:02:37 itojun Exp $
+#	$Id: options,v 1.64 1998/03/04 10:24:08 dufault Exp $
 
 # Format:
 # Option name	filename
 
 # Miscellaneous options.
 BOUNCE_BUFFERS	opt_bounce.h
 COMPAT_43	opt_compat.h
 COMPAT_SUNOS	opt_compat.h
 COMPILING_LINT	opt_lint.h
 DDB
 DDB_UNATTENDED	opt_ddb.h
 GDB_REMOTE_CHAT	opt_ddb.h
 DEVFS
 DEVFS_ROOT	opt_devfs.h
 FAILSAFE
 HW_WDOG
 KTRACE
 MD5
 MFS_AUTOLOAD	opt_mfs.h
 MFS_ROOT	opt_mfs.h
 NO_LKM
 NSWAPDEV	opt_swap.h
 PPS_SYNC	opt_ntp.h
 QUOTA
 SPX_HACK
 SUIDDIR		opt_suiddir.h
 SYSVMSG		opt_sysvipc.h
 SYSVSEM		opt_sysvipc.h
 SYSVSHM		opt_sysvipc.h
 UCONSOLE
 
 # POSIX 4.
 POSIX4		opt_posix4.h
 
 # Do we want the config file compiled into the kernel?
 INCLUDE_CONFIG_FILE	opt_config.h
 
 # Options for static file systems.  These should only be used at config
 # time, since the corresponding lkms cannot work if there are any static
 # dependencies.  Unusability is enforced by hiding the defines for the
 # options in a never-included header.
 EXT2FS		opt_dontuse.h
 FDESC		opt_dontuse.h
 KERNFS		opt_dontuse.h
 MFS		opt_dontuse.h
 MSDOSFS		opt_dontuse.h
 NULLFS		opt_dontuse.h
 PORTAL		opt_dontuse.h
 PROCFS		opt_dontuse.h
 UMAPFS		opt_dontuse.h
 
 # These static filesystems has one slightly bogus static dependency in
 # sys/i386/i386/autoconf.c.  If any of these filesystems are
 # statically compiled into the kernel, code for mounting them as root
 # filesystems will be enabled - but look below.  Boot-code is purposely
 # unavailable for the LKM-based versions.
 CD9660
 FFS
 NFS
+
+# If you are following the conditions in the copyright, 
+# you can enable soft-updates which will speed up a lot of thigs 
+# and make the system safer from crashes at the same time.
+# otherwise a STUB module will be compiled in.
+SOFTUPDATES	opt_ffs.h
 
 # The above static dependencies are planned removed, with a
 # <filesystem>_ROOT option to control if it usable as root.  This list
 # allows these options to be present in config files already (though
 # they won't make any difference yet).
 CD9660_ROOT	opt_cd9660.h
 FFS_ROOT	opt_ffs.h
 NFS_ROOT	opt_nfs.h
 
 # Multi-session CD-Rs might require a huge amount of time in order to
 # "settle".  If we are about mounting them as the root f/s, we gotta
 # wait a little.
 CD9660_ROOTDELAY	opt_cd9660.h
 
 # The union static file system has bogus static dependencies, so it isn't
 # hidden yet.
 UNION
 
 # Options used only in param.c.
 EXTRAVNODES	opt_defunct.h
 MSGMNB		opt_param.h
 MSGMNI		opt_param.h
 MSGSEG		opt_param.h
 MSGSSZ		opt_param.h
 MSGTQL		opt_param.h
 NBUF		opt_param.h
 NMBCLUSTERS	opt_param.h
 SEMMAP		opt_param.h
 SEMMNI		opt_param.h
 SEMMNS		opt_param.h
 SEMMNU		opt_param.h
 SEMMSL		opt_param.h
 SEMOPM		opt_param.h
 SEMUME		opt_param.h
 SHMALL		opt_param.h
 SHMMAX		opt_param.h
 SHMMAXPGS	opt_param.h
 SHMMIN		opt_param.h
 SHMMNI		opt_param.h
 SHMSEG		opt_param.h
 
 # Generic SCSI options.
 SCSIDEBUG		opt_scsi.h
 SCSI_DELAY		opt_scsi.h
 SCSI_REPORT_GEOMETRY	opt_scsi.h
 SCSI_2_DEF		opt_scsi.h
 
 # Options used only in scsi/od.c.
 OD_AUTO_TURNOFF		opt_od.h
 OD_BOGUS_NOT_READY	opt_od.h
 
 # Options used only in pci/ncr.c
 SCSI_NCR_DEBUG		opt_ncr.h
 SCSI_NCR_DFLT_TAGS	opt_ncr.h
 SCSI_NCR_MAX_SYNC	opt_ncr.h
 SCSI_NCR_MAX_WIDE	opt_ncr.h
 SCSI_NCR_MYADDR		opt_ncr.h
 
 # Resource limits.
 CHILD_MAX		opt_defunct.h
 DFLDSIZ			opt_rlimit.h
 MAXDSIZ			opt_rlimit.h
 OPEN_MAX		opt_defunct.h
 
 # Net stuff.
 ARP_PROXYALL		opt_defunct.h
 BOOTP			opt_bootp.h
 BOOTP_COMPAT		opt_bootp.h
 BOOTP_NFSROOT		opt_bootp.h
 BOOTP_NFSV3		opt_bootp.h
 GATEWAY			opt_defunct.h
 MROUTING		opt_mrouting.h
 INET			opt_inet.h
 IPDIVERT
 IPFIREWALL		opt_ipfw.h
 IPFIREWALL_VERBOSE	opt_ipfw.h
 IPFIREWALL_VERBOSE_LIMIT	opt_ipfw.h
 IPFIREWALL_DEFAULT_TO_ACCEPT	opt_ipfw.h
 IPX			opt_ipx.h
 IPXIP			opt_ipx.h
 IPTUNNEL		opt_ipx.h
 NETATALK		opt_atalk.h
 PPP_BSDCOMP		opt_ppp.h
 PPP_DEFLATE		opt_ppp.h
 PPP_FILTER		opt_ppp.h
 TCP_COMPAT_42		opt_compat.h
 TCPDEBUG
 
 # XXX Conflict: # of devices vs network protocol (Native ATM).
 # This makes "atm.h" unusable.
 NATM			opt_natm.h
 
 # DPT driver debug flags
 DPT_VERIFY_HINTR        opt_dpt.h
 DPT_USE_SINTR           opt_dpt.h
 DPT_RESTRICTED_FREELIST opt_dpt.h
 DPT_MEASURE_PERFORMANCE opt_dpt.h
 DPT_FREELIST_IS_STACK   opt_dpt.h
 DPT_HANDLE_TIMEOUTS     opt_dpt.h
 DPT_TIMEOUT_FACTOR      opt_dpt.h
 DPT_INTR_DELAY		opt_dpt.h
 DPT_LOST_IRQ		opt_dpt.h
 
 # Misc debug flags.  Most of these should probably be replaced with
 # 'DEBUG', and then let people recompile just the interesting modules
 # with 'make CC="cc -DDEBUG'.
 CLUSTERDEBUG		opt_debug_cluster.h
 DEBUG_1284		opt_debug_1284.h
 LOCKF_DEBUG		opt_debug_lockf.h
 LOUTB			opt_debug_outb.h
 NPX_DEBUG		opt_debug_npx.h
 NETATALKDEBUG		opt_atalk.h
 NULLFS_DIAGNOSTIC	opt_debug_nullfs.h
 SI_DEBUG		opt_debug_si.h
 
 
 # These cause changes all over the kernel
 DEBUG			opt_global.h
 DIAGNOSTIC		opt_global.h
 SIMPLELOCK_DEBUG	opt_global.h
 
 # These are VM related options
 VM_KMEM_SIZE	opt_vm.h
 VM_KMEM_SIZE_SCALE opt_vm.h
 VM_KMEM_SIZE_MAX	opt_vm.h
 
 # sys/netkey
 KEY
 KEY_DEBUG		opt_key.h
Index: head/sys/dev/de/if_de.c
===================================================================
--- head/sys/dev/de/if_de.c	(revision 34265)
+++ head/sys/dev/de/if_de.c	(revision 34266)
@@ -1,5442 +1,5444 @@
+#undef __FreeBSD__
+#define __FreeBSD__ 3
 /*	$NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $	*/
-/*	$Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */
+/*	$Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */
 
 /*-
  * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp
  *
  */
 
 /*
  * DEC 21040 PCI Ethernet Controller
  *
  * Written by Matt Thomas
  * BPF support code stolen directly from if_ec.c
  *
  *   This driver supports the DEC DE435 or any other PCI
  *   board which support 21040, 21041, or 21140 (mostly).
  */
 #define	TULIP_HDR_DATA
 
 #include "opt_inet.h"
 #include "opt_ipx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #if defined(__FreeBSD__)
 #include <machine/clock.h>
 #elif defined(__bsdi__) || defined(__NetBSD__)
 #include <sys/device.h>
 #endif
 
 #if defined(__NetBSD__)
 #include "rnd.h"
 #if NRND > 0
 #include <sys/rnd.h>
 #endif
 #endif
 
 #include <net/if.h>
 #if defined(SIOCSIFMEDIA) && !defined(TULIP_NOIFMEDIA)
 #include <net/if_media.h>
 #endif
 #include <net/if_dl.h>
 #ifdef TULIP_USE_SOFTINTR
 #include <net/netisr.h>
 #endif
 
 #if defined(__bsdi__) && _BSDI_VERSION >= 199701
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #endif
 
 #include "bpfilter.h"
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 #ifdef NS
 #include <netns/ns.h>
 #include <netns/ns_if.h>
 #endif
 
 #include <vm/vm.h>
 
 #if defined(__FreeBSD__)
 #include <vm/pmap.h>
 #include <pci.h>
 #if NPCI > 0
 #include <pci/pcivar.h>
 #include <pci/dc21040reg.h>
 #define	DEVAR_INCLUDE	"pci/if_devar.h"
 #endif
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
 #include <netinet/if_ether.h>
 #include <i386/pci/ic/dc21040reg.h>
 #include <i386/isa/isa.h>
 #include <i386/isa/icu.h>
 #include <i386/isa/dma.h>
 #include <i386/isa/isavar.h>
 #include <i386/pci/pci.h>
 #if _BSDI_VERSION < 199510
 #include <eisa.h>
 #else
 #define	NEISA 0
 #endif
 #if NEISA > 0 && _BSDI_VERSION >= 199401
 #include <i386/eisa/eisa.h>
 #define	TULIP_EISA
 #endif
 #define	DEVAR_INCLUDE	"i386/pci/if_devar.h"
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
 #include <net/if_ether.h>
 #if defined(INET)
 #include <netinet/if_inarp.h>
 #endif
 #include <machine/bus.h>
 #if defined(__alpha__)
 #include <machine/intr.h>
 #endif
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/ic/dc21040reg.h>
 #define	DEVAR_INCLUDE	"dev/pci/if_devar.h"
 #endif /* __NetBSD__ */
 
 /*
  * Intel CPUs should use I/O mapped access.
  */
 #if defined(__i386__) || defined(TULIP_EISA)
 #define	TULIP_IOMAPPED
 #endif
 
 #if 0
 /*
  * This turns on all sort of debugging stuff and make the
  * driver much larger.
  */
 #define TULIP_DEBUG
 #endif
 
 #if 0
 #define	TULIP_PERFSTATS
 #endif
 
 #if 0
 #define	TULIP_USE_SOFTINTR
 #endif
 
 #define	TULIP_HZ	10
 
 #include DEVAR_INCLUDE
 /*
  * This module supports
  *	the DEC 21040 PCI Ethernet Controller.
  *	the DEC 21041 PCI Ethernet Controller.
  *	the DEC 21140 PCI Fast Ethernet Controller.
  */
 static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr);
 static tulip_intrfunc_t tulip_intr_shared(void *arg);
 static tulip_intrfunc_t tulip_intr_normal(void *arg);
 static void tulip_init(tulip_softc_t * const sc);
 static void tulip_reset(tulip_softc_t * const sc);
 static ifnet_ret_t tulip_ifstart_one(struct ifnet *ifp);
 static ifnet_ret_t tulip_ifstart(struct ifnet *ifp);
 static struct mbuf *tulip_txput(tulip_softc_t * const sc, struct mbuf *m);
 static void tulip_txput_setup(tulip_softc_t * const sc);
 static void tulip_rx_intr(tulip_softc_t * const sc);
 static void tulip_addr_filter(tulip_softc_t * const sc);
 static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno);
 static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data);
 static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities);
 static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc);
 static int tulip_srom_decode(tulip_softc_t * const sc);
 #if defined(IFM_ETHER)
 static int tulip_ifmedia_change(struct ifnet * const ifp);
 static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req);
 #endif
 /* static void tulip_21140_map_media(tulip_softc_t *sc); */
 
 static void
 tulip_timeout_callback(
     void *arg)
 {
     tulip_softc_t * const sc = arg;
     tulip_spl_t s = TULIP_RAISESPL();
 
     TULIP_PERFSTART(timeout)
 
     sc->tulip_flags &= ~TULIP_TIMEOUTPENDING;
     sc->tulip_probe_timeout -= 1000 / TULIP_HZ;
     (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER);
 
     TULIP_PERFEND(timeout);
     TULIP_RESTORESPL(s);
 }
 
 static void
 tulip_timeout(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_flags & TULIP_TIMEOUTPENDING)
 	return;
     sc->tulip_flags |= TULIP_TIMEOUTPENDING;
     timeout(tulip_timeout_callback, sc, (hz + TULIP_HZ / 2) / TULIP_HZ);
 }
 
 #if defined(TULIP_NEED_FASTTIMEOUT)
 static void
 tulip_fasttimeout_callback(
     void *arg)
 {
     tulip_softc_t * const sc = arg;
     tulip_spl_t s = TULIP_RAISESPL();
 
     sc->tulip_flags &= ~TULIP_FASTTIMEOUTPENDING;
     (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_FASTTIMER);
     TULIP_RESTORESPL(s);
 }
 
 static void
 tulip_fasttimeout(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_flags & TULIP_FASTTIMEOUTPENDING)
 	return;
     sc->tulip_flags |= TULIP_FASTTIMEOUTPENDING;
     timeout(tulip_fasttimeout_callback, sc, 1);
 }
 #endif
 
 static int
 tulip_txprobe(
     tulip_softc_t * const sc)
 {
     struct mbuf *m;
     /*
      * Before we are sure this is the right media we need
      * to send a small packet to make sure there's carrier.
      * Strangely, BNC and AUI will "see" receive data if
      * either is connected so the transmit is the only way
      * to verify the connectivity.
      */
     MGETHDR(m, M_DONTWAIT, MT_DATA);
     if (m == NULL)
 	return 0;
     /*
      * Construct a LLC TEST message which will point to ourselves.
      */
     bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_dhost, 6);
     bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_shost, 6);
     mtod(m, struct ether_header *)->ether_type = htons(3);
     mtod(m, unsigned char *)[14] = 0;
     mtod(m, unsigned char *)[15] = 0;
     mtod(m, unsigned char *)[16] = 0xE3;	/* LLC Class1 TEST (no poll) */
     m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3;
     /*
      * send it!
      */
     sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
     sc->tulip_intrmask |= TULIP_STS_TXINTR;
     sc->tulip_flags |= TULIP_TXPROBE_ACTIVE;
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
     TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     if ((m = tulip_txput(sc, m)) != NULL)
 	m_freem(m);
     sc->tulip_probe.probe_txprobes++;
     return 1;
 }
 
 #ifdef BIG_PACKET
 #define TULIP_SIAGEN_WATCHDOG	(sc->tulip_if.if_mtu > ETHERMTU ? TULIP_WATCHDOG_RXDISABLE|TULIP_WATCHDOG_TXDISABLE : 0)
 #else
 #define	TULIP_SIAGEN_WATCHDOG	0
 #endif
 
 static void
 tulip_media_set(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     const tulip_media_info_t *mi = sc->tulip_mediums[media];
 
     if (mi == NULL)
 	return;
 
     /*
      * If we are switching media, make sure we don't think there's
      * any stale RX activity
      */
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	TULIP_CSR_WRITE(sc, csr_sia_tx_rx,        mi->mi_sia_tx_rx);
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_control|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	    DELAY(50);
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_data|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	} else {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity);
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 #define	TULIP_GPR_CMDBITS	(TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL)
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol);
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_MII
 	       && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) {
 	int idx;
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    const u_int8_t *dp;
 	    dp = &sc->tulip_rombuf[mi->mi_reset_offset];
 	    for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    dp = &sc->tulip_rombuf[mi->mi_gpr_offset];
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	} else {
 	    for (idx = 0; idx < mi->mi_reset_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]);
 	    }
 	}
 	if (sc->tulip_flags & TULIP_TRYNWAY) {
 	    tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	} else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL);
 	    data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE);
 	    sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    if (TULIP_IS_MEDIA_FD(media))
 		data |= PHYCTL_FULL_DUPLEX;
 	    if (TULIP_IS_MEDIA_100MB(media))
 		data |= PHYCTL_SELECT_100MB;
 	    tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data);
 	}
     }
 }
 
 static void
 tulip_linkup(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	sc->tulip_flags |= TULIP_PRINTLINKUP;
     sc->tulip_flags |= TULIP_LINKUP;
     sc->tulip_if.if_flags &= ~IFF_OACTIVE;
 #if 0 /* XXX how does with work with ifmedia? */
     if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	if (sc->tulip_if.if_flags & IFF_FULLDUPLEX) {
 	    if (TULIP_CAN_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL)
 		media = TULIP_FD_MEDIA_OF(media);
 	} else {
 	    if (TULIP_IS_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL)
 		media = TULIP_HD_MEDIA_OF(media);
 	}
     }
 #endif
     if (sc->tulip_media != media) {
 #ifdef TULIP_DEBUG
 	sc->tulip_dbg.dbg_last_media = sc->tulip_media;
 #endif
 	sc->tulip_media = media;
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	if (TULIP_IS_MEDIA_FD(sc->tulip_media)) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	} else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	}
     }
     /*
      * We could set probe_timeout to 0 but setting to 3000 puts this
      * in one central place and the only matters is tulip_link is
      * followed by a tulip_timeout.  Therefore setting it should not
      * result in aberrant behavour.
      */
     sc->tulip_probe_timeout = 3000;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY);
     if (sc->tulip_flags & TULIP_INRESET) {
 	tulip_media_set(sc, sc->tulip_media);
     } else if (sc->tulip_probe_media != sc->tulip_media) {
 	/*
 	 * No reason to change media if we have the right media.
 	 */
 	tulip_reset(sc);
 	tulip_init(sc);
     }
 }
 
 static void
 tulip_media_print(
     tulip_softc_t * const sc)
 {
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	return;
     if (sc->tulip_flags & TULIP_PRINTMEDIA) {
 	printf(TULIP_PRINTF_FMT ": enabling %s port\n",
 	       TULIP_PRINTF_ARGS,
 	       tulip_mediums[sc->tulip_media]);
 	sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
     } else if (sc->tulip_flags & TULIP_PRINTLINKUP) {
 	printf(TULIP_PRINTF_FMT ": link up\n", TULIP_PRINTF_ARGS);
 	sc->tulip_flags &= ~TULIP_PRINTLINKUP;
     }
 }
 
 #if defined(TULIP_DO_GPR_SENSE)
 static tulip_media_t
 tulip_21140_gpr_media_sense(
     tulip_softc_t * const sc)
 {
     tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t last_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t media;
 
     /*
      * If one of the media blocks contained a default media flag,
      * use that.
      */
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	const tulip_media_info_t *mi;
 	/*
 	 * Media is not supported (or is full-duplex).
 	 */
 	if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media))
 	    continue;
 	if (mi->mi_type != TULIP_MEDIAINFO_GPR)
 	    continue;
 
 	/*
 	 * Remember the media is this is the "default" media.
 	 */
 	if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN)
 	    maybe_media = media;
 
 	/*
 	 * No activity mask?  Can't see if it is active if there's no mask.
 	 */
 	if (mi->mi_actmask == 0)
 	    continue;
 
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata)
 	    continue;
 
 #if defined(TULIP_DEBUG)
 	printf(TULIP_PRINTF_FMT ": gpr_media_sense: %s: 0x%02x & 0x%02x == 0x%02x\n",
 	       TULIP_PRINTF_ARGS, tulip_mediums[media],
 	       TULIP_CSR_READ(sc, csr_gp) & 0xFF,
 	       mi->mi_actmask, mi->mi_actdata);
 #endif
 	/*
 	 * It does!  If this is the first media we detected, then 
 	 * remember this media.  If isn't the first, then there were
 	 * multiple matches which we equate to no match (since we don't
 	 * which to select (if any).
 	 */
 	if (last_media == TULIP_MEDIA_UNKNOWN) {
 	    last_media = media;
 	} else if (last_media != media) {
 	    last_media = TULIP_MEDIA_UNKNOWN;
 	}
     }
     return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media;
 }
 #endif /* TULIP_DO_GPR_SENSE */
 
 static tulip_link_status_t
 tulip_media_link_monitor(
     tulip_softc_t * const sc)
 {
     const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media];
     tulip_link_status_t linkup = TULIP_LINK_DOWN;
 
     if (mi == NULL) {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 	panic("tulip_media_link_monitor: %s: botch at line %d\n",
 	      tulip_mediums[sc->tulip_media],__LINE__);
 #endif
 	return TULIP_LINK_UNKNOWN;
     }
 
 
     /*
      * Have we seen some packets?  If so, the link must be good.
      */
     if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) {
 	sc->tulip_flags &= ~TULIP_RXACT;
 	sc->tulip_probe_timeout = 3000;
 	return TULIP_LINK_UP;
     }
 
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 	u_int32_t status;
 	/*
 	 * Read the PHY status register.
 	 */
 	status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
 	if (status & PHYSTS_AUTONEG_DONE) {
 	    /*
 	     * If the PHY has completed autonegotiation, see the if the
 	     * remote systems abilities have changed.  If so, upgrade or
 	     * downgrade as appropriate.
 	     */
 	    u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES);
 	    abilities = (abilities << 6) & status;
 	    if (abilities != sc->tulip_abilities) {
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n",
 			   TULIP_PRINTF_ARGS, sc->tulip_phyaddr,
 			   sc->tulip_abilities, abilities);
 #endif
 		if (tulip_mii_map_abilities(sc, abilities)) {
 		    tulip_linkup(sc, sc->tulip_probe_media);
 		    return TULIP_LINK_UP;
 		}
 		/*
 		 * if we had selected media because of autonegotiation,
 		 * we need to probe for the new media.
 		 */
 		sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		if (sc->tulip_flags & TULIP_DIDNWAY)
 		    return TULIP_LINK_DOWN;
 	    }
 	}
 	/*
 	 * The link is now up.  If was down, say its back up.
 	 */
 	if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 	/*
 	 * No activity sensor?  Assume all's well.
 	 */
 	if (mi->mi_actmask == 0)
 	    return TULIP_LINK_UNKNOWN;
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	/*
 	 * Assume non TP ok for now.
 	 */
 	if (!TULIP_IS_MEDIA_TP(sc->tulip_media))
 	    return TULIP_LINK_UNKNOWN;
 	if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0)
 	    linkup = TULIP_LINK_UP;
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_timeout <= 0)
 	    printf(TULIP_PRINTF_FMT ": sia status = 0x%08x\n", TULIP_PRINTF_ARGS, TULIP_CSR_READ(sc, csr_sia_status));
 #endif
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	return TULIP_LINK_UNKNOWN;
     }
     /*
      * We will wait for 3 seconds until the link goes into suspect mode.
      */
     if (sc->tulip_flags & TULIP_LINKUP) {
 	if (linkup == TULIP_LINK_UP)
 	    sc->tulip_probe_timeout = 3000;
 	if (sc->tulip_probe_timeout > 0)
 	    return TULIP_LINK_UP;
 
 	sc->tulip_flags &= ~TULIP_LINKUP;
 	printf(TULIP_PRINTF_FMT ": link down: cable problem?\n", TULIP_PRINTF_ARGS);
     }
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_link_downed++;
 #endif
     return TULIP_LINK_DOWN;
 }
 
 static void
 tulip_media_poll(
     tulip_softc_t * const sc,
     tulip_mediapoll_event_t event)
 {
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE
 	    && event == TULIP_MEDIAPOLL_TIMER) {
 	switch (tulip_media_link_monitor(sc)) {
 	    case TULIP_LINK_DOWN: {
 		/*
 		 * Link Monitor failed.  Probe for new media.
 		 */
 		event = TULIP_MEDIAPOLL_LINKFAIL;
 		break;
 	    }
 	    case TULIP_LINK_UP: {
 		/*
 		 * Check again soon.
 		 */
 		tulip_timeout(sc);
 		return;
 	    }
 	    case TULIP_LINK_UNKNOWN: {
 		/*
 		 * We can't tell so don't bother.
 		 */
 		return;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) {
 	    if (TULIP_DO_AUTOSENSE(sc)) {
 #if defined(TULIP_DEBUG)
 		sc->tulip_dbg.dbg_link_failures++;
 #endif
 		sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 		tulip_reset(sc);	/* restart probe */
 	    }
 	    return;
 	}
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_link_pollintrs++;
 #endif
     }
 
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE)
 	    return;
 	sc->tulip_probe_mediamask = 0;
 	sc->tulip_probe_passes = 0;
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_media_probes++;
 #endif
 	/*
 	 * If the SROM contained an explicit media to use, use it.
 	 */
 	sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX);
 	sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS;
 	sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
 	/*
 	 * connidx is defaulted to a media_unknown type.
 	 */
 	sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media;
 	if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    tulip_timeout(sc);
 	    return;
 	}
 
 	if (sc->tulip_features & TULIP_HAVE_GPR) {
 	    sc->tulip_probe_state = TULIP_PROBE_GPRTEST;
 	    sc->tulip_probe_timeout = 2000;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	    sc->tulip_probe_timeout = 0;
 	    sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	}
     }
 
     /*
      * Ignore txprobe failures or spurious callbacks.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED
 	    && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) {
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * If we really transmitted a packet, then that's the media we'll use.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) {
 	if (event == TULIP_MEDIAPOLL_LINKPASS)
 	    sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 #if defined(TULIP_DEBUG)
 	else
 	    sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	tulip_linkup(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) {
 #if defined(TULIP_DO_GPR_SENSE)
 	/*
 	 * Check for media via the general purpose register.
 	 *
 	 * Try to sense the media via the GPR.  If the same value
 	 * occurs 3 times in a row then just use that.
 	 */
 	if (sc->tulip_probe_timeout > 0) {
 	    tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc);
 #if defined(TULIP_DEBUG)
 	    printf(TULIP_PRINTF_FMT ": media_poll: gpr sensing = %s\n",
 		   TULIP_PRINTF_ARGS, tulip_mediums[new_probe_media]);
 #endif
 	    if (new_probe_media != TULIP_MEDIA_UNKNOWN) {
 		if (new_probe_media == sc->tulip_probe_media) {
 		    if (--sc->tulip_probe_count == 0)
 			tulip_linkup(sc, sc->tulip_probe_media);
 		} else {
 		    sc->tulip_probe_count = 10;
 		}
 	    }
 	    sc->tulip_probe_media = new_probe_media;
 	    tulip_timeout(sc);
 	    return;
 	}
 #endif /* TULIP_DO_GPR_SENSE */
 	/*
 	 * Brute force.  We cycle through each of the media types
 	 * and try to transmit a packet.
 	 */
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	sc->tulip_probe_timeout = 0;
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST
 	   && (sc->tulip_features & TULIP_HAVE_MII)) {
 	tulip_media_t old_media = sc->tulip_probe_media;
 	tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	switch (sc->tulip_probe_state) {
 	    case TULIP_PROBE_FAILED:
 	    case TULIP_PROBE_MEDIATEST: {
 		/*
 		 * Try the next media.
 		 */
 		sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask;
 		sc->tulip_probe_timeout = 0;
 #ifdef notyet
 		if (sc->tulip_probe_state == TULIP_PROBE_FAILED)
 		    break;
 		if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		    break;
 		sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300;
 #endif
 		break;
 	    }
 	    case TULIP_PROBE_PHYAUTONEG: {
 		return;
 	    }
 	    case TULIP_PROBE_INACTIVE: {
 		/*
 		 * Only probe if we autonegotiated a media that hasn't failed.
 		 */
 		sc->tulip_probe_timeout = 0;
 		if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) {
 		    sc->tulip_probe_media = old_media;
 		    break;
 		}
 		tulip_linkup(sc, sc->tulip_probe_media);
 		tulip_timeout(sc);
 		return;
 	    }
 	    default: {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 		panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 		break;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++;
 #endif
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * switch to another media if we tried this one enough.
      */
     if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) {
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 	    printf(TULIP_PRINTF_FMT ": poll media unknown!\n",
 		   TULIP_PRINTF_ARGS);
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	}
 #endif
 	/*
 	 * Find the next media type to check for.  Full Duplex
 	 * types are not allowed.
 	 */
 	do {
 	    sc->tulip_probe_media -= 1;
 	    if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 		if (++sc->tulip_probe_passes == 3) {
 		    printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n",
 			   TULIP_PRINTF_ARGS);
 		    if ((sc->tulip_if.if_flags & IFF_UP) == 0) {
 			sc->tulip_if.if_flags &= ~IFF_RUNNING;
 			sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 			return;
 		    }
 		}
 		sc->tulip_flags ^= TULIP_TRYNWAY;	/* XXX */
 		sc->tulip_probe_mediamask = 0;
 		sc->tulip_probe_media = TULIP_MEDIA_MAX - 1;
 	    }
 	} while (sc->tulip_mediums[sc->tulip_probe_media] == NULL
 		 || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media))
 		 || TULIP_IS_MEDIA_FD(sc->tulip_probe_media));
 
 #if defined(TULIP_DEBUG)
 	printf(TULIP_PRINTF_FMT ": %s: probing %s\n", TULIP_PRINTF_ARGS,
 	       event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout",
 	       tulip_mediums[sc->tulip_probe_media]);
 #endif
 	sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000;
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe.probe_txprobes = 0;
 	tulip_reset(sc);
 	tulip_media_set(sc, sc->tulip_probe_media);
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     }
     tulip_timeout(sc);
 
     /*
      * If this is hanging off a phy, we know are doing NWAY and we have
      * forced the phy to a specific speed.  Wait for link up before
      * before sending a packet.
      */
     switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) {
 	case TULIP_MEDIAINFO_MII: {
 	    if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		return;
 	    break;
 	}
 	case TULIP_MEDIAINFO_SIA: {
 	    if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) {
 		if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL)
 		    return;
 		tulip_linkup(sc, sc->tulip_probe_media);
 #ifdef notyet
 		if (sc->tulip_features & TULIP_HAVE_MII)
 		    tulip_timeout(sc);
 #endif
 		return;
 	    }
 	    break;
 	}
 	case TULIP_MEDIAINFO_RESET:
 	case TULIP_MEDIAINFO_SYM:
 	case TULIP_MEDIAINFO_NONE:
 	case TULIP_MEDIAINFO_GPR: {
 	    break;
 	}
     }
     /*
      * Try to send a packet.
      */
     tulip_txprobe(sc);
 }
 
 static void
 tulip_media_select(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_features & TULIP_HAVE_GPR) {
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata);
     }
     /*
      * If this board has no media, just return
      */
     if (sc->tulip_features & TULIP_HAVE_NOMEDIA)
 	return;
 
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	(*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START);
     } else {
 	tulip_media_set(sc, sc->tulip_media);
     }
 }
 
 static void
 tulip_21040_mediainfo_init(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160
 	|TULIP_CMD_BACKOFFCTR;
     sc->tulip_if.if_baudrate = 10000000;
 
     if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD);
     }
 
     if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC);
     }
 
     if (media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA);
     }
 }
 
 static void
 tulip_21040_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN);
     return;
 }
 
 static void
 tulip_21040_10baset_only_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET);
     tulip_media_set(sc, TULIP_MEDIA_10BASET);
     sc->tulip_media = TULIP_MEDIA_10BASET;
 }
 
 static void
 tulip_21040_10baset_only_media_select(
     tulip_softc_t * const sc)
 {
     sc->tulip_flags |= TULIP_LINKUP;
     if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) {
 	sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags &= ~TULIP_SQETEST;
     } else {
 	sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags |= TULIP_SQETEST;
     }
     tulip_media_set(sc, sc->tulip_media);
 }
 
 static void
 tulip_21040_auibnc_only_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP;
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_media = TULIP_MEDIA_AUIBNC;
 }
 
 static void
 tulip_21040_auibnc_only_media_select(
     tulip_softc_t * const sc)
 {
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 }
 
 static const tulip_boardsw_t tulip_21040_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_media_probe,
     tulip_media_select,
     tulip_media_poll,
 };
 
 static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_10baset_only_media_probe,
     tulip_21040_10baset_only_media_select,
     NULL,
 };
 
 static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_auibnc_only_media_probe,
     tulip_21040_auibnc_only_media_select,
     NULL,
 };
 
 static void
 tulip_21041_mediainfo_init(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t * const mi = sc->tulip_mediainfo;
 
 #ifdef notyet
     if (sc->tulip_revinfo >= 0x20) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC);
 	return;
     }
 #endif
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC);
 }
 
 static void
 tulip_21041_media_probe(
     tulip_softc_t * const sc)
 {
     sc->tulip_if.if_baudrate = 10000000;
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT
 	|TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR;
     sc->tulip_intrmask |= TULIP_STS_LINKPASS;
     tulip_21041_mediainfo_init(sc);
 }
 
 static void
 tulip_21041_media_poll(
     tulip_softc_t * const sc,
     const tulip_mediapoll_event_t event)
 {
     u_int32_t sia_status;
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE
 		|| !TULIP_DO_AUTOSENSE(sc))
 	    return;
 	sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	tulip_reset(sc);	/* start probe */
 	return;
     }
 
     /*
      * If we've been been asked to start a poll or link change interrupt
      * restart the probe (and reset the tulip to a known state).
      */
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN);
 #ifdef notyet
 	if (sc->tulip_revinfo >= 0x20) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	}
 #endif
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT;
 	tulip_media_set(sc, TULIP_MEDIA_10BASET);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	return;
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     sia_status = TULIP_CSR_READ(sc, csr_sia_status);
     TULIP_CSR_WRITE(sc, csr_sia_status, sia_status);
     if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) {
 	if (sc->tulip_revinfo >= 0x20) {
 	    if (sia_status & (PHYSTS_10BASET_FD << (16 - 6)))
 		sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
 	}
 	/*
 	 * If the link has passed LinkPass, 10baseT is the
 	 * proper media to use.
 	 */
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     /*
      * wait for up to 2.4 seconds for the link to reach pass state.
      * Only then start scanning the other media for activity.
      * choose media with receive activity over those without.
      */
     if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) {
 	if (event != TULIP_MEDIAPOLL_TIMER)
 	    return;
 	if (sc->tulip_probe_timeout > 0
 		&& (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) {
 	    tulip_timeout(sc);
 	    return;
 	}
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	sc->tulip_flags |= TULIP_WANTRXACT;
 	if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) {
 	    sc->tulip_probe_media = TULIP_MEDIA_BNC;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_AUI;
 	}
 	tulip_media_set(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     /*
      * If we failed, clear the txprobe active flag.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED)
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 
 
     if (event == TULIP_MEDIAPOLL_TIMER) {
 	/*
 	 * If we've received something, then that's our link!
 	 */
 	if (sc->tulip_flags & TULIP_RXACT) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    return;
 	}
 	/*
 	 * if no txprobe active  
 	 */
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0
 		&& ((sc->tulip_flags & TULIP_WANTRXACT) == 0
 		    || (sia_status & TULIP_SIASTS_RXACTIVITY))) {
 	    sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    tulip_txprobe(sc);
 	    tulip_timeout(sc);
 	    return;
 	}
 	/*
 	 * Take 2 passes through before deciding to not
 	 * wait for receive activity.  Then take another
 	 * two passes before spitting out a warning.
 	 */
 	if (sc->tulip_probe_timeout <= 0) {
 	    if (sc->tulip_flags & TULIP_WANTRXACT) {
 		sc->tulip_flags &= ~TULIP_WANTRXACT;
 		sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    } else {
 		printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n",
 		       TULIP_PRINTF_ARGS);
 		if ((sc->tulip_if.if_flags & IFF_UP) == 0) {
 		    sc->tulip_if.if_flags &= ~IFF_RUNNING;
 		    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		    return;
 		}
 	    }
 	}
     }
     
     /*
      * Since this media failed to probe, try the other one.
      */
     sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
     if (sc->tulip_probe_media == TULIP_MEDIA_AUI) {
 	sc->tulip_probe_media = TULIP_MEDIA_BNC;
     } else {
 	sc->tulip_probe_media = TULIP_MEDIA_AUI;
     }
     tulip_media_set(sc, sc->tulip_probe_media);
     sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     tulip_timeout(sc);
 }
 
 static const tulip_boardsw_t tulip_21041_boardsw = {
     TULIP_21041_GENERIC,
     tulip_21041_media_probe,
     tulip_media_select,
     tulip_21041_media_poll
 };
 
 static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = {
     { 0x20005c00, 0,		/* 08-00-17 */
       {
 	{ 0x19, 0x0040, 0x0040 },	/* 10TX */
 	{ 0x19, 0x0040, 0x0000 },	/* 100TX */
       },
 #if defined(TULIP_DEBUG)
       "NS DP83840",
 #endif
     },
     { 0x0281F400, 0,		/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Seeq 80C240"
 #endif
     },
 #if 0
     { 0x0015F420, 0,	/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Broadcom BCM5000"
 #endif
     },
 #endif
     { 0x0281F400, 0,		/* 00-A0-BE */
       {
 	{ 0x11, 0x8000, 0x0000 },	/* 10T */
 	{ 0x11, 0x8000, 0x8000 },	/* 100TX */
 	{ },				/* 100T4 */
 	{ 0x11, 0x4000, 0x4000 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "ICS 1890"
 #endif 
     },
     { 0 }
 };
 
 static tulip_media_t
 tulip_mii_phy_readspecific(
     tulip_softc_t * const sc)
 {
     const tulip_phy_attr_t *attr;
     u_int16_t data;
     u_int32_t id;
     unsigned idx = 0;
     static const tulip_media_t table[] = {
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET,
 	TULIP_MEDIA_100BASETX,
 	TULIP_MEDIA_100BASET4,
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET_FD,
 	TULIP_MEDIA_100BASETX_FD,
 	TULIP_MEDIA_UNKNOWN
     };
 
     /*
      * Don't read phy specific registers if link is not up.
      */
     data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
     if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS))
 	return TULIP_MEDIA_UNKNOWN;
 
     id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) |
 	tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH);
     for (attr = tulip_mii_phy_attrlist;; attr++) {
 	if (attr->attr_id == 0)
 	    return TULIP_MEDIA_UNKNOWN;
 	if ((id & ~0x0F) == attr->attr_id)
 	    break;
     }
 
     if (attr->attr_modes[PHY_MODE_100TX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 2;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 3;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 1;
     } 
     if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0);
     }
     return table[idx];
 }
 
 static unsigned
 tulip_mii_get_phyaddr(
     tulip_softc_t * const sc,
     unsigned offset)
 {
     unsigned phyaddr;
 
     for (phyaddr = 1; phyaddr < 32; phyaddr++) {
 	unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    continue;
 	if (offset == 0)
 	    return phyaddr;
 	offset--;
     }
     if (offset == 0) {
 	unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    return TULIP_MII_NOPHY;
 	return 0;
     }
     return TULIP_MII_NOPHY;
 }
 
 static int
 tulip_mii_map_abilities(
     tulip_softc_t * const sc,
     unsigned abilities)
 {
     sc->tulip_abilities = abilities;
     if (abilities & PHYSTS_100BASETX_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD;
     } else if (abilities & PHYSTS_100BASET4) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASET4;
     } else if (abilities & PHYSTS_100BASETX) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX;
     } else if (abilities & PHYSTS_10BASET_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
     } else if (abilities & PHYSTS_10BASET) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	return 0;
     }
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     return 1;
 }
 
 static void
 tulip_mii_autonegotiate(
     tulip_softc_t * const sc,
     const unsigned phyaddr)
 {
     switch (sc->tulip_probe_state) {
         case TULIP_PROBE_MEDIATEST:
         case TULIP_PROBE_INACTIVE: {
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET);
 	    sc->tulip_probe_timeout = 3000;
 	    sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR;
 	    sc->tulip_probe_state = TULIP_PROBE_PHYRESET;
 	    /* FALL THROUGH */
 	}
         case TULIP_PROBE_PHYRESET: {
 	    u_int32_t status;
 	    u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 	    if (data & PHYCTL_RESET) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 		printf(TULIP_PRINTF_FMT "(phy%d): error: reset of PHY never completed!\n",
 			   TULIP_PRINTF_ARGS, phyaddr);
 		sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 		sc->tulip_probe_state = TULIP_PROBE_FAILED;
 		sc->tulip_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
 		return;
 	    }
 	    status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    if ((status & PHYSTS_CAN_AUTONEG) == 0) {
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation disabled\n",
 			   TULIP_PRINTF_ARGS, phyaddr);
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01))
 		tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01);
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE);
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 #if defined(TULIP_DEBUG)
 	    if ((data & PHYCTL_AUTONEG_ENABLE) == 0)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): oops: enable autonegotiation failed: 0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, data);
 	    else
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation restarted: 0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, data);
 	    sc->tulip_dbg.dbg_nway_starts++;
 #endif
 	    sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG;
 	    sc->tulip_probe_timeout = 3000;
 	    /* FALL THROUGH */
 	}
         case TULIP_PROBE_PHYAUTONEG: {
 	    u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    u_int32_t data;
 	    if ((status & PHYSTS_AUTONEG_DONE) == 0) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, status,
 			   tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL));
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES);
 #if defined(TULIP_DEBUG)
 	    loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation complete: 0x%04x\n",
 		       TULIP_PRINTF_ARGS, phyaddr, data);
 #endif
 	    data = (data << 6) & status;
 	    if (!tulip_mii_map_abilities(sc, data))
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    return;
 	}
 	default: {
 #if defined(DIAGNOSTIC)
 	    panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 	    break;
 	}
     }
 #if defined(TULIP_DEBUG)
     loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation failure: state = %d\n",
 	       TULIP_PRINTF_ARGS, phyaddr, sc->tulip_probe_state);
 	    sc->tulip_dbg.dbg_nway_failures++;
 #endif
 }
 
 static void
 tulip_2114x_media_preset(
     tulip_softc_t * const sc)
 {
     const tulip_media_info_t *mi = NULL;
     tulip_media_t media = sc->tulip_media;
 
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	media = sc->tulip_media;
     else
 	media = sc->tulip_probe_media;
     
     sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT;
     sc->tulip_flags &= ~TULIP_SQETEST;
     if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) {
 #if defined(TULIP_DEBUG)
 	if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) {
 #endif
 	    mi = sc->tulip_mediums[media];
 	    if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 		sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_GPR
 		       || mi->mi_type == TULIP_MEDIAINFO_SYM) {
 		sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 		sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 		TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	    }
 #if defined(TULIP_DEBUG)
 	} else {
 	    printf(TULIP_PRINTF_FMT ": preset: bad media %d!\n",
 		   TULIP_PRINTF_ARGS, media);
 	}
 #endif
     }
     switch (media) {
 	case TULIP_MEDIA_BNC:
 	case TULIP_MEDIA_AUI:
 	case TULIP_MEDIA_10BASET: {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 10000000;
 	    sc->tulip_flags |= TULIP_SQETEST;
 	    break;
 	}
 	case TULIP_MEDIA_10BASET_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 10000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX:
 	case TULIP_MEDIA_100BASET4:
 	case TULIP_MEDIA_100BASETX: {
 	    sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL);
 	    sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    sc->tulip_if.if_baudrate = 100000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX_FD:
 	case TULIP_MEDIA_100BASETX_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 100000000;
 	    break;
 	}
 	default: {
 	    break;
 	}
     }
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 }
 
 /*
  ********************************************************************
  *  Start of 21140/21140A support which does not use the MII interface 
  */
 
 static void
 tulip_null_media_poll(
     tulip_softc_t * const sc,
     tulip_mediapoll_event_t event)
 {
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 #if defined(DIAGNOSTIC)
     printf(TULIP_PRINTF_FMT ": botch(media_poll) at line %d\n",
 	   TULIP_PRINTF_ARGS, __LINE__);
 #endif
 }
 
 __inline__ static void
 tulip_21140_mediainit(
     tulip_softc_t * const sc,
     tulip_media_info_t * const mip,
     tulip_media_t const media,
     unsigned gpdata,
     unsigned cmdmode)
 {
     sc->tulip_mediums[media] = mip;
     mip->mi_type = TULIP_MEDIAINFO_GPR;
     mip->mi_cmdmode = cmdmode;
     mip->mi_gpdata = gpdata;
 }
 
 static void
 tulip_21140_evalboard_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
 
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_eb_boardsw = {
     TULIP_21140_DEC_EB,
     tulip_21140_evalboard_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_accton_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     unsigned gpdata;
 
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     gpdata = TULIP_CSR_READ(sc, csr_gp);
     if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) {
 		sc->tulip_media = TULIP_MEDIA_BNC;
         } else {
 		sc->tulip_media = TULIP_MEDIA_100BASETX;
         }
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC,
 			  TULIP_GP_EN1207_BNC_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_accton_boardsw = {
     TULIP_21140_EN1207,
     tulip_21140_accton_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_smc9332_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int idx, cnt = 0;
 
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS;
     sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT);
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) {
 	    if (++cnt > 100)
 		break;
 	} else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) {
 	    break;
 	} else {
 	    cnt = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
 }
  
 static const tulip_boardsw_t tulip_21140_smc9332_boardsw = {
     TULIP_21140_SMC_9332,
     tulip_21140_smc9332_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_cogent_em100_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command);
 
     sc->tulip_gpinit = TULIP_GP_EM100_PINS;
     sc->tulip_gpdata = TULIP_GP_EM100_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT);
 
     cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE;
     cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER);
     if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode);
 	sc->tulip_media = TULIP_MEDIA_100BASEFX;
 
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_FULLDUPLEX);
     } else {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER);
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     }
 }
 
 static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = {
     TULIP_21140_COGENT_EM100,
     tulip_21140_cogent_em100_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset
 };
 
 static void
 tulip_21140_znyx_zx34x_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int cnt10 = 0, cnt100 = 0, idx;
 
     sc->tulip_gpinit = TULIP_GP_ZX34X_PINS;
     sc->tulip_gpdata = TULIP_GP_ZX34X_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
 
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) {
 	    if (++cnt100 > 100)
 		break;
 	} else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) {
 	    if (++cnt10 > 100)
 		break;
 	} else {
 	    cnt10 = 0;
 	    cnt100 = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = {
     TULIP_21140_ZNYX_ZX34X,
     tulip_21140_znyx_zx34x_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_2114x_media_probe(
     tulip_softc_t * const sc)
 {
     sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE
 	|TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72;
 }
 
 static const tulip_boardsw_t tulip_2114x_isv_boardsw = {
     TULIP_21140_ISV,
     tulip_2114x_media_probe,
     tulip_media_select,
     tulip_media_poll,
     tulip_2114x_media_preset,
 };
 
 /*
  * ******** END of chip-specific handlers. ***********
  */
 
 /*
  * Code the read the SROM and MII bit streams (I2C)
  */
 static void
 tulip_delay_300ns(
     tulip_softc_t * const sc)
 {
     int idx;
     for (idx = (300 / 33) + 1; idx > 0; idx--)
 	(void) TULIP_CSR_READ(sc, csr_busmode);
 }
 
 #define EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0)
 
 static void
 tulip_srom_idle(
     tulip_softc_t * const sc)
 {
     unsigned bit, csr;
     
     csr  = SROMSEL ; EMIT;
     csr  = SROMSEL | SROMRD; EMIT;  
     csr ^= SROMCS; EMIT;
     csr ^= SROMCLKON; EMIT;
 
     /*
      * Write 25 cycles of 0 which will force the SROM to be idle.
      */
     for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) {
         csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
     }
     csr ^= SROMCLKOFF; EMIT;
     csr ^= SROMCS; EMIT;
     csr  = 0; EMIT;
 }
 
      
 static void
 tulip_srom_read(
     tulip_softc_t * const sc)
 {   
     unsigned idx; 
     const unsigned bitwidth = SROM_BITWIDTH;
     const unsigned cmdmask = (SROMCMD_RD << bitwidth);
     const unsigned msb = 1 << (bitwidth + 3 - 1);
     unsigned lastidx = (1 << bitwidth) - 1;
 
     tulip_srom_idle(sc);
 
     for (idx = 0; idx <= lastidx; idx++) {
         unsigned lastbit, data, bits, bit, csr;
 	csr  = SROMSEL ;	        EMIT;
         csr  = SROMSEL | SROMRD;        EMIT;
         csr ^= SROMCSON;                EMIT;
         csr ^=            SROMCLKON;    EMIT;
     
         lastbit = 0;
         for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) {
             const unsigned thisbit = bits & msb;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
             if (thisbit != lastbit) {
                 csr ^= SROMDOUT; EMIT;  /* clock low; invert data */
             } else {
 		EMIT;
 	    }
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
             lastbit = thisbit;
         }
         csr ^= SROMCLKOFF; EMIT;
 
         for (data = 0, bits = 0; bits < 16; bits++) {
             data <<= 1;
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */ 
             data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         }
 	sc->tulip_rombuf[idx*2] = data & 0xFF;
 	sc->tulip_rombuf[idx*2+1] = data >> 8;
 	csr  = SROMSEL | SROMRD; EMIT;
 	csr  = 0; EMIT;
     }
     tulip_srom_idle(sc);
 }
 
 #define MII_EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0)
 
 static void
 tulip_mii_writebits(
     tulip_softc_t * const sc,
     unsigned data,
     unsigned bits)
 {
     unsigned msb = 1 << (bits - 1);
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned lastbit = (csr & MII_DOUT) ? msb : 0;
 
     csr |= MII_WR; MII_EMIT;  		/* clock low; assert write */
 
     for (; bits > 0; bits--, data <<= 1) {
 	const unsigned thisbit = data & msb;
 	if (thisbit != lastbit) {
 	    csr ^= MII_DOUT; MII_EMIT;  /* clock low; invert data */
 	}
 	csr ^= MII_CLKON; MII_EMIT;     /* clock high; data valid */
 	lastbit = thisbit;
 	csr ^= MII_CLKOFF; MII_EMIT;    /* clock low; data not valid */
     }
 }
 
 static void
 tulip_mii_turnaround(
     tulip_softc_t * const sc,
     unsigned cmd)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
 
     if (cmd == MII_WRCMD) {
 	csr |= MII_DOUT; MII_EMIT;	/* clock low; change data */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 	csr ^= MII_DOUT; MII_EMIT;	/* clock low; change data */
     } else {
 	csr |= MII_RD; MII_EMIT;	/* clock low; switch to read */
     }
     csr ^= MII_CLKON; MII_EMIT;		/* clock high; data valid */
     csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 }
 
 static unsigned
 tulip_mii_readbits(
     tulip_softc_t * const sc)
 {
     unsigned data;
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     int idx;
 
     for (idx = 0, data = 0; idx < 16; idx++) {
 	data <<= 1;	/* this is NOOP on the first pass through */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN)
 	    data |= 1;
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
     }
     csr ^= MII_RD; MII_EMIT;		/* clock low; turn off read */
 
     return data;
 }
 
 static unsigned
 tulip_mii_readreg(
     tulip_softc_t * const sc,
     unsigned devaddr,
     unsigned regno)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned data;
 
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_RDCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_RDCMD);
 
     data = tulip_mii_readbits(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][0] = data;
     sc->tulip_dbg.dbg_phyregs[regno][1]++;
 #endif
     return data;
 }
 
 static void
 tulip_mii_writereg(
     tulip_softc_t * const sc,
     unsigned devaddr,
     unsigned regno,
     unsigned data)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_WRCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_WRCMD);
     tulip_mii_writebits(sc, data, 16);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][2] = data;
     sc->tulip_dbg.dbg_phyregs[regno][3]++;
 #endif
 }
 
 #define	tulip_mchash(mca)	(tulip_crc32(mca, 6) & 0x1FF)
 #define	tulip_srom_crcok(databuf)	( \
     ((tulip_crc32(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \
      ((databuf)[126] | ((databuf)[127] << 8)))
 
 static unsigned
 tulip_crc32(
     const unsigned char *databuf,
     size_t datalen)
 {
     u_int idx, bit, data, crc = 0xFFFFFFFFUL;
 
     for (idx = 0; idx < datalen; idx++)
         for (data = *databuf++, bit = 0; bit < 8; bit++, data >>= 1)
             crc = (crc >> 1) ^ (((crc ^ data) & 1) ? TULIP_CRC32_POLY : 0);
     return crc;
 }
 
 static void
 tulip_identify_dec_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "DEC ");
 #define D0	4
     if (sc->tulip_chipid <= TULIP_DE425)
 	return;
     if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0
 	|| bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) {
 	bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8);
 	sc->tulip_boardid[D0+8] = ' ';
     }
 #undef D0
 }
 
 static void
 tulip_identify_znyx_nic(
     tulip_softc_t * const sc)
 {
     unsigned id = 0;
     strcpy(sc->tulip_boardid, "ZNYX ZX3XX ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	unsigned znyx_ptr;
 	sc->tulip_boardid[8] = '4';
 	znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125];
 	if (znyx_ptr < 26 || znyx_ptr > 116) {
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    return;
 	}
 	/* ZX344 = 0010 .. 0013FF
 	 */
 	if (sc->tulip_rombuf[znyx_ptr] == 0x4A
 		&& sc->tulip_rombuf[znyx_ptr + 1] == 0x52
 		&& sc->tulip_rombuf[znyx_ptr + 2] == 0x01) {
 	    id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4];
 	    if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) {
 		sc->tulip_boardid[9] = '2';
 		if (id == TULIP_ZNYX_ID_ZX342B) {
 		    sc->tulip_boardid[10] = 'B';
 		    sc->tulip_boardid[11] = ' ';
 		}
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX344) {
 		sc->tulip_boardid[10] = '4';
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX345) {
 		sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5';
 	    } else if (id == TULIP_ZNYX_ID_ZX346) {
 		sc->tulip_boardid[9] = '6';
 	    } else if (id == TULIP_ZNYX_ID_ZX351) {
 		sc->tulip_boardid[8] = '5';
 		sc->tulip_boardid[9] = '1';
 	    }
 	}
 	if (id == 0) {
 	    /*
 	     * Assume it's a ZX342...
 	     */
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	}
 	return;
     }
     sc->tulip_boardid[8] = '1';
     if (sc->tulip_chipid == TULIP_21041) {
 	sc->tulip_boardid[10] = '1';
 	return;
     }
     if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) {
 	id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36];
 	if (id == TULIP_ZNYX_ID_ZX312T) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardid[10] = 'T';
 	    sc->tulip_boardid[11] = ' ';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	} else if (id == TULIP_ZNYX_ID_ZX314_INTA) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX314) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315_INTA) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else {
 	    id = 0;
 	}
     }		    
     if (id == 0) {
 	if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	}
     }
 }
 
 static void
 tulip_identify_smc_nic(
     tulip_softc_t * const sc)
 {
     u_int32_t id1, id2, ei;
     int auibnc = 0, utp = 0;
     char *cp;
 
     strcpy(sc->tulip_boardid, "SMC ");
     if (sc->tulip_chipid == TULIP_21041)
 	return;
     if (sc->tulip_chipid != TULIP_21040) {
 	if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	    strcpy(&sc->tulip_boardid[4], "9332DST ");
 	    sc->tulip_boardsw = &tulip_21140_smc9332_boardsw;
 	} else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) {
 	    strcpy(&sc->tulip_boardid[4], "9334BDT ");
 	} else {
 	    strcpy(&sc->tulip_boardid[4], "9332BDT ");
 	}
 	return;
     }
     id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8);
     id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8);
     ei  = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8);
 
     strcpy(&sc->tulip_boardid[4], "8432");
     cp = &sc->tulip_boardid[8];
     if ((id1 & 1) == 0)
 	*cp++ = 'B', auibnc = 1;
     if ((id1 & 0xFF) > 0x32)
 	*cp++ = 'T', utp = 1;
     if ((id1 & 0x4000) == 0)
 	*cp++ = 'A', auibnc = 1;
     if (id2 == 0x15) {
 	sc->tulip_boardid[7] = '4';
 	*cp++ = '-';
 	*cp++ = 'C';
 	*cp++ = 'H';
 	*cp++ = (ei ? '2' : '1');
     }
     *cp++ = ' ';
     *cp = '\0';
     if (utp && !auibnc)
 	sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
     else if (!utp && auibnc)
 	sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw;
 }
 
 static void
 tulip_identify_cogent_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "Cogent ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) {
 	    strcat(sc->tulip_boardid, "EM100FX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 	} else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	    strcat(sc->tulip_boardid, "EM100FX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 	}
 	/*
 	 * Magic number (0x24001109U) is the SubVendor (0x2400) and
 	 * SubDevId (0x1109) for the ANA6944TX (EM440TX).
 	 */
 	if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U
 		&& (sc->tulip_features & TULIP_HAVE_BASEROM)) {
 	    /*
 	     * Cogent (Adaptec) is still mapping all INTs to INTA of
 	     * first 21140.  Dumb!  Dumb!
 	     */
 	    strcat(sc->tulip_boardid, "EM440TX ");
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR;
 	}
     } else if (sc->tulip_chipid == TULIP_21040) {
 	sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
     }
 }
 
 static void
 tulip_identify_accton_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "ACCTON ");
     switch (sc->tulip_chipid) {
 	case TULIP_21140A:
 	    strcat(sc->tulip_boardid, "EN1207 ");
 	    sc->tulip_boardsw = &tulip_21140_accton_boardsw;
 	    break;
 	case TULIP_21140:
 	    strcat(sc->tulip_boardid, "EN1207TX ");
 	    sc->tulip_boardsw = &tulip_21140_eb_boardsw;
             break;
         case TULIP_21040:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21040_boardsw;
             break;
         case TULIP_21041:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21041_boardsw;
             break;
 	default:
             sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
             break;
     }
 }
 
 static void
 tulip_identify_asante_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "Asante ");
     if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A)
 	    && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	tulip_media_info_t *mi = sc->tulip_mediainfo;
 	int idx;
 	/*
 	 * The Asante Fast Ethernet doesn't always ship with a valid
 	 * new format SROM.  So if isn't in the new format, we cheat
 	 * set it up as if we had.
 	 */
 
 	sc->tulip_gpinit = TULIP_GP_ASANTE_PINS;
 	sc->tulip_gpdata = 0;
 
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET);
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET);
 	DELAY(100);
 	TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 	mi->mi_type = TULIP_MEDIAINFO_MII;
 	mi->mi_gpr_length = 0;
 	mi->mi_gpr_offset = 0;
 	mi->mi_reset_length = 0;
 	mi->mi_reset_offset = 0;;
 
 	mi->mi_phyaddr = TULIP_MII_NOPHY;
 	for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) {
 	    DELAY(10000);
 	    mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0);
 	}
 	if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 	    printf(TULIP_PRINTF_FMT ": can't find phy 0\n", TULIP_PRINTF_ARGS);
 	    return;
 	}
 
 	sc->tulip_features |= TULIP_HAVE_MII;
 	mi->mi_capabilities  = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_full_duplex   = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD;
 	mi->mi_tx_threshold  = PHYSTS_10BASET|PHYSTS_10BASET_FD;
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 	mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 	    tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 
 	sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
     }
 }
 
 static int
 tulip_srom_decode(
     tulip_softc_t * const sc)
 {
     unsigned idx1, idx2, idx3;
 
     const tulip_srom_header_t *shp = (tulip_srom_header_t *) &sc->tulip_rombuf[0];
     const tulip_srom_adapter_info_t *saip = (tulip_srom_adapter_info_t *) (shp + 1);
     tulip_srom_media_t srom_media;
     tulip_media_info_t *mi = sc->tulip_mediainfo;
     const u_int8_t *dp;
     u_int32_t leaf_offset, blocks, data;
 
     for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) {
 	if (shp->sh_adapter_count == 1)
 	    break;
 	if (saip->sai_device == sc->tulip_pci_devno)
 	    break;
     }
     /*
      * Didn't find the right media block for this card.
      */
     if (idx1 == shp->sh_adapter_count)
 	return 0;
 
     /*
      * Save the hardware address.
      */
     bcopy((caddr_t) shp->sh_ieee802_address, (caddr_t) sc->tulip_enaddr, 6);
     /*
      * If this is a multiple port card, add the adapter index to the last
      * byte of the hardware address.  (if it isn't multiport, adding 0
      * won't hurt.
      */
     sc->tulip_enaddr[5] += idx1;
 
     leaf_offset = saip->sai_leaf_offset_lowbyte
 	+ saip->sai_leaf_offset_highbyte * 256;
     dp = sc->tulip_rombuf + leaf_offset;
 	
     sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2;
 
     for (idx2 = 0;; idx2++) {
 	if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype
 	        || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED)
 	    break;
     }
     sc->tulip_connidx = idx2;
 
     if (sc->tulip_chipid == TULIP_21041) {
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    tulip_media_t media;
 	    data = *dp++;
 	    srom_media = (tulip_srom_media_t) (data & 0x3F);
 	    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 		if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 		    break;
 	    }
 	    media = tulip_srom_mediums[idx3].sm_type;
 	    if (media != TULIP_MEDIA_UNKNOWN) {
 		if (data & TULIP_SROM_21041_EXTENDED) {
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_sia_connectivity = dp[0] + dp[1] * 256;
 		    mi->mi_sia_tx_rx        = dp[2] + dp[3] * 256;
 		    mi->mi_sia_general      = dp[4] + dp[5] * 256;
 		    mi++;
 		} else {
 		    switch (media) {
 			case TULIP_MEDIA_BNC: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_AUI: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET_FD: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD);
 			    mi++;
 			    break;
 			}
 			default: {
 			    break;
 			}
 		    }
 		}
 	    }
 	    if (data & TULIP_SROM_21041_EXTENDED)	
 		dp += 6;
 	}
 #ifdef notdef
 	if (blocks == 0) {
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++;
 	}
 #endif
     } else {
 	unsigned length, type;
 	tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN;
 	if (sc->tulip_features & TULIP_HAVE_GPR)
 	    sc->tulip_gpinit = *dp++;
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    const u_int8_t *ep;
 	    if ((*dp & 0x80) == 0) {
 		length = 4;
 		type = 0;
 	    } else {
 		length = (*dp++ & 0x7f) - 1;
 		type = *dp++ & 0x3f;
 	    }
 	    ep = dp + length;
 	    switch (type & 0x3f) {
 		case 0: {	/* 21140[A] GPR block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_GPR;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpdata = dp[1];
 		    if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) {
 			sc->tulip_gpdata = mi->mi_gpdata;
 			gp_media = media;
 		    }
 		    data = dp[2] + dp[3] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 #if 0
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 #endif
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    mi++;
 		    break;
 		}
 		case 1: {	/* 21140[A] MII block */
 		    const unsigned phyno = *dp++;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_reset_length;
 
 		    /*
 		     * Before we probe for a PHY, use the GPR information
 		     * to select it.  If we don't, it may be inaccessible.
 		     */
 		    TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET);
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]);
 		    }
 
 		    /*
 		     * At least write something!
 		     */
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 			printf(TULIP_PRINTF_FMT ": can't find phy %d\n",
 			       TULIP_PRINTF_ARGS, phyno);
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 2: {	/* 2114[23] SIA block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    if (type & 0x40) {
 			mi->mi_sia_connectivity = dp[0] + dp[1] * 256;
 			mi->mi_sia_tx_rx        = dp[2] + dp[3] * 256;
 			mi->mi_sia_general      = dp[4] + dp[5] * 256;
 			dp += 6;
 		    } else {
 			switch (media) {
 			    case TULIP_MEDIA_BNC: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC);
 				break;
 			    }
 			    case TULIP_MEDIA_AUI: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI);
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET);
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET_FD: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD);
 				break;
 			    }
 			    default: {
 				goto bad_media;
 			    }
 			}
 		    }
 		    mi->mi_sia_gp_control = (dp[0] + dp[1] * 256) << 16;
 		    mi->mi_sia_gp_data    = (dp[2] + dp[3] * 256) << 16;
 		    mi++;
 		  bad_media:
 		    break;
 		}
 		case 3: {	/* 2114[23] MII PHY block */
 		    const unsigned phyno = *dp++;
 		    const u_int8_t *dp0;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 
 		    dp0 = &sc->tulip_rombuf[mi->mi_reset_offset];
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset];
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_sia_general, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 			printf(TULIP_PRINTF_FMT ": can't find phy %d\n",
 			       TULIP_PRINTF_ARGS, phyno);
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 4: {	/* 21143 SYM block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SYM;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16;
 		    mi->mi_gpdata    = (dp[3] + dp[4] * 256) << 16;
 		    data = dp[5] + dp[6] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    mi++;
 		    break;
 		}
 #if 0
 		case 5: {	/* 21143 Reset block */
 		    mi->mi_type = TULIP_MEDIAINFO_RESET;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 		    mi++;
 		    break;
 		}
 #endif
 		default: {
 		}
 	    }
 	    dp = ep;
 	}
     }
     return mi - sc->tulip_mediainfo;
 }
 
 static const struct {
     void (*vendor_identify_nic)(tulip_softc_t * const sc);
     unsigned char vendor_oui[3];
 } tulip_vendors[] = {
     { tulip_identify_dec_nic,		{ 0x08, 0x00, 0x2B } },
     { tulip_identify_dec_nic,		{ 0x00, 0x00, 0xF8 } },
     { tulip_identify_smc_nic,		{ 0x00, 0x00, 0xC0 } },
     { tulip_identify_smc_nic,		{ 0x00, 0xE0, 0x29 } },
     { tulip_identify_znyx_nic,		{ 0x00, 0xC0, 0x95 } },
     { tulip_identify_cogent_nic,	{ 0x00, 0x00, 0x92 } },
     { tulip_identify_asante_nic,	{ 0x00, 0x00, 0x94 } },
     { tulip_identify_accton_nic,	{ 0x00, 0x00, 0xE8 } },
     { NULL }
 };
 
 /*
  * This deals with the vagaries of the address roms and the
  * brain-deadness that various vendors commit in using them.
  */
 static int
 tulip_read_macaddr(
     tulip_softc_t * const sc)
 {
     unsigned cksum, rom_cksum, idx;
     u_int32_t csr;
     unsigned char tmpbuf[8];
     static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA };
 
     sc->tulip_connidx = TULIP_SROM_LASTCONNIDX;
 
     if (sc->tulip_chipid == TULIP_21040) {
 	TULIP_CSR_WRITE(sc, csr_enetrom, 1);
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    int cnt = 0;
 	    while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000)
 		cnt++;
 	    sc->tulip_rombuf[idx] = csr & 0xFF;
 	}
 	sc->tulip_boardsw = &tulip_21040_boardsw;
 #if defined(TULIP_EISA)
     } else if (sc->tulip_chipid == TULIP_DE425) {
 	int cnt;
 	for (idx = 0, cnt = 0; idx < sizeof(testpat) && cnt < 32; cnt++) {
 	    tmpbuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom);
 	    if (tmpbuf[idx] == testpat[idx])
 		++idx;
 	    else
 		idx = 0;
 	}
 	for (idx = 0; idx < 32; idx++)
 	    sc->tulip_rombuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom);
 	sc->tulip_boardsw = &tulip_21040_boardsw;
 #endif /* TULIP_EISA */
     } else {
 	if (sc->tulip_chipid == TULIP_21041) {
 	    /*
 	     * Thankfully all 21041's act the same.
 	     */
 	    sc->tulip_boardsw = &tulip_21041_boardsw;
 	} else {
 	    /*
 	     * Assume all 21140 board are compatible with the
 	     * DEC 10/100 evaluation board.  Not really valid but
 	     * it's the best we can do until every one switches to
 	     * the new SROM format.
 	     */
 	     
 	    sc->tulip_boardsw = &tulip_21140_eb_boardsw;
 	}
 	tulip_srom_read(sc);
 	if (tulip_srom_crcok(sc->tulip_rombuf)) {
 	    /*
 	     * SROM CRC is valid therefore it must be in the
 	     * new format.
 	     */
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM;
 	} else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) {
 	    /*
 	     * No checksum is present.  See if the SROM id checks out;
 	     * the first 18 bytes should be 0 followed by a 1 followed
 	     * by the number of adapters (which we don't deal with yet).
 	     */
 	    for (idx = 0; idx < 18; idx++) {
 		if (sc->tulip_rombuf[idx] != 0)
 		    break;
 	    }
 	    if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0)
 		sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	} else if (sc->tulip_chipid >= TULIP_21142) {
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	    sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 	}
 	if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) {
 	    if (sc->tulip_chipid != TULIP_21041)
 		sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 
 	    /*
 	     * If the SROM specifies more than one adapter, tag this as a
 	     * BASE rom.
 	     */
 	    if (sc->tulip_rombuf[19] > 1)
 		sc->tulip_features |= TULIP_HAVE_BASEROM;
 	    if (sc->tulip_boardsw == NULL)
 		return -6;
 	    goto check_oui;
 	}
     }
 
 
     if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) {
 	/*
 	 * Some folks don't use the standard ethernet rom format
 	 * but instead just put the address in the first 6 bytes
 	 * of the rom and let the rest be all 0xffs.  (Can we say
 	 * ZNYX???) (well sometimes they put in a checksum so we'll
 	 * start at 8).
 	 */
 	for (idx = 8; idx < 32; idx++) {
 	    if (sc->tulip_rombuf[idx] != 0xFF)
 		return -4;
 	}
 	/*
 	 * Make sure the address is not multicast or locally assigned
 	 * that the OUI is not 00-00-00.
 	 */
 	if ((sc->tulip_rombuf[0] & 3) != 0)
 	    return -4;
 	if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0
 		&& sc->tulip_rombuf[2] == 0)
 	    return -4;
 	bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 	sc->tulip_features |= TULIP_HAVE_OKROM;
 	goto check_oui;
     } else {
 	/*
 	 * A number of makers of multiport boards (ZNYX and Cogent)
 	 * only put on one address ROM on their 21040 boards.  So
 	 * if the ROM is all zeros (or all 0xFFs), look at the
 	 * previous configured boards (as long as they are on the same
 	 * PCI bus and the bus number is non-zero) until we find the
 	 * master board with address ROM.  We then use its address ROM
 	 * as the base for this board.  (we add our relative board
 	 * to the last byte of its address).
 	 */
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF)
 		break;
 	}
 	if (idx == sizeof(sc->tulip_rombuf)) {
 	    int root_unit;
 	    tulip_softc_t *root_sc = NULL;
 	    for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) {
 		root_sc = TULIP_UNIT_TO_SOFTC(root_unit);
 		if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM)
 		    break;
 		root_sc = NULL;
 	    }
 	    if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM)
 		    && root_sc->tulip_chipid == sc->tulip_chipid
 		    && root_sc->tulip_pci_busno == sc->tulip_pci_busno) {
 		sc->tulip_features |= TULIP_HAVE_SLAVEDROM;
 		sc->tulip_boardsw = root_sc->tulip_boardsw;
 		strcpy(sc->tulip_boardid, root_sc->tulip_boardid);
 		if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) {
 		    bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf,
 			  sizeof(sc->tulip_rombuf));
 		    if (!tulip_srom_decode(sc))
 			return -5;
 		} else {
 		    bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6);
 		    sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit;
 		}
 		/*
 		 * Now for a truly disgusting kludge: all 4 21040s on
 		 * the ZX314 share the same INTA line so the mapping
 		 * setup by the BIOS on the PCI bridge is worthless.
 		 * Rather than reprogramming the value in the config
 		 * register, we will handle this internally.
 		 */
 		if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) {
 		    sc->tulip_slaves = root_sc->tulip_slaves;
 		    root_sc->tulip_slaves = sc;
 		    sc->tulip_features |= TULIP_HAVE_SLAVEDINTR;
 		}
 		return 0;
 	    }
 	}
     }
 
     /*
      * This is the standard DEC address ROM test.
      */
 
     if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0)
 	return -3;
 
     tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14];
     tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12];
     tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10];
     tmpbuf[6] = sc->tulip_rombuf[9];  tmpbuf[7] = sc->tulip_rombuf[8];
     if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0)
 	return -2;
 
     bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 
     cksum = *(u_int16_t *) &sc->tulip_enaddr[0];
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[2];
     if (cksum > 65535) cksum -= 65535;
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[4];
     if (cksum >= 65535) cksum -= 65535;
 
     rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6];
 	
     if (cksum != rom_cksum)
 	return -1;
 
   check_oui:
     /*
      * Check for various boards based on OUI.  Did I say braindead?
      */
     for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) {
 	if (bcmp((caddr_t) sc->tulip_enaddr,
 		 (caddr_t) tulip_vendors[idx].vendor_oui, 3) == 0) {
 	    (*tulip_vendors[idx].vendor_identify_nic)(sc);
 	    break;
 	}
     }
 
     sc->tulip_features |= TULIP_HAVE_OKROM;
     return 0;
 }
 
 #if defined(IFM_ETHER)
 static void
 tulip_ifmedia_add(
     tulip_softc_t * const sc)
 {
     tulip_media_t media;
     int medias = 0;
 
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	if (sc->tulip_mediums[media] != NULL) {
 	    ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media],
 			0, 0);
 	    medias++;
 	}
     }
     if (medias == 0) {
 	sc->tulip_features |= TULIP_HAVE_NOMEDIA;
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE);
     } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO);
     } else {
 	ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]);
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	tulip_linkup(sc, sc->tulip_media);
     }
 }
 
 static int
 tulip_ifmedia_change(
     struct ifnet * const ifp)
 {
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     sc->tulip_flags |= TULIP_NEEDRESET;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_media = TULIP_MEDIA_UNKNOWN;
     if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) {
 	tulip_media_t media;
 	for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	    if (sc->tulip_mediums[media] != NULL
 		&& sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) {
 		sc->tulip_flags |= TULIP_PRINTMEDIA;
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		tulip_linkup(sc, media);
 		return 0;
 	    }
 	}
     }
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT);
     tulip_reset(sc);
     tulip_init(sc);
     return 0;
 }
 
 /*
  * Media status callback
  */
 static void
 tulip_ifmedia_status(
     struct ifnet * const ifp,
     struct ifmediareq *req)
 {
     tulip_softc_t *sc = TULIP_IFP_TO_SOFTC(ifp);
 
 #if defined(__bsdi__)
     if (sc->tulip_mii.mii_instance != 0) {
 	mii_pollstat(&sc->tulip_mii);
 	req->ifm_active = sc->tulip_mii.mii_media_active;
 	req->ifm_status = sc->tulip_mii.mii_media_status;
 	return;
     }
 #endif
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN)
 	return;
 
     req->ifm_status = IFM_AVALID;
     if (sc->tulip_flags & TULIP_LINKUP)
 	req->ifm_status |= IFM_ACTIVE;
 
     req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media];
 }
 #endif
 
 static void
 tulip_addr_filter(
     tulip_softc_t * const sc)
 {
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
     struct ifmultiaddr *ifma;
     u_char *addrp;
 #else
     struct ether_multistep step;
     struct ether_multi *enm;
 #endif
     int multicnt;
 
     sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI);
     sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART;
     sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
     sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 #if defined(IFF_ALLMULTI)    
     sc->tulip_if.if_flags &= ~IFF_ALLMULTI;
 #endif
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
     multicnt = 0;
     for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 	 ifma = ifma->ifma_link.le_next) {
 
 	    if (ifma->ifma_addr->sa_family == AF_LINK)
 		multicnt++;
     }
 #else
     multicnt = sc->tulip_multicnt;
 #endif
 
     sc->tulip_if.if_start = tulip_ifstart;	/* so the setup packet gets queued */
     if (multicnt > 14) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	unsigned hash;
 	/*
 	 * Some early passes of the 21140 have broken implementations of
 	 * hash-perfect mode.  When we get too many multicasts for perfect
 	 * filtering with these chips, we need to switch into hash-only
 	 * mode (this is better than all-multicast on network with lots
 	 * of multicast traffic).
 	 */
 	if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH)
 	    sc->tulip_flags |= TULIP_WANTHASHONLY;
 	else
 	    sc->tulip_flags |= TULIP_WANTHASHPERFECT;
 	/*
 	 * If we have more than 14 multicasts, we have
 	 * go into hash perfect mode (512 bit multicast
 	 * hash and one perfect hardware).
 	 */
 	bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata));
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 	     ifma = ifma->ifma_link.le_next) {
 
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
 		sp[hash >> 4] |= 1 << (hash & 0xF);
 	}
 #else
 	ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm);
 	while (enm != NULL) {
 		if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) {
 		    hash = tulip_mchash(enm->enm_addrlo);
 		    sp[hash >> 4] |= 1 << (hash & 0xF);
 		} else {
 		    sc->tulip_flags |= TULIP_ALLMULTI;
 		    sc->tulip_flags &= ~(TULIP_WANTHASHONLY|TULIP_WANTHASHPERFECT);
 		    break;
 		}
 		ETHER_NEXT_MULTI(step, enm);
 	}
 #endif
 	/*
 	 * No reason to use a hash if we are going to be
 	 * receiving every multicast.
 	 */
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    hash = tulip_mchash(etherbroadcastaddr);
 	    sp[hash >> 4] |= 1 << (hash & 0xF);
 	    if (sc->tulip_flags & TULIP_WANTHASHONLY) {
 		hash = tulip_mchash(sc->tulip_enaddr);
 		sp[hash >> 4] |= 1 << (hash & 0xF);
 	    } else {
 		sp[39] = ((u_int16_t *) sc->tulip_enaddr)[0]; 
 		sp[40] = ((u_int16_t *) sc->tulip_enaddr)[1]; 
 		sp[41] = ((u_int16_t *) sc->tulip_enaddr)[2];
 	    }
 	}
     }
     if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	int idx = 0;
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    /*
 	     * Else can get perfect filtering for 16 addresses.
 	     */
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	    for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 		 ifma = ifma->ifma_link.le_next) {
 		    if (ifma->ifma_addr->sa_family != AF_LINK)
 			    continue;
 		    addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
 		    *sp++ = ((u_int16_t *) addrp)[0]; 
 		    *sp++ = ((u_int16_t *) addrp)[1]; 
 		    *sp++ = ((u_int16_t *) addrp)[2];
 		    idx++;
 	    }
 #else
 	    ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm);
 	    for (; enm != NULL; idx++) {
 		if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) {
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[0]; 
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[1]; 
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[2];
 		} else {
 		    sc->tulip_flags |= TULIP_ALLMULTI;
 		    break;
 		}
 		ETHER_NEXT_MULTI(step, enm);
 	    }
 #endif
 	    /*
 	     * Add the broadcast address.
 	     */
 	    idx++;
 	    *sp++ = 0xFFFF;
 	    *sp++ = 0xFFFF;
 	    *sp++ = 0xFFFF;
 	}
 	/*
 	 * Pad the rest with our hardware address
 	 */
 	for (; idx < 16; idx++) {
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[0]; 
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[1]; 
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[2];
 	}
     }
 #if defined(IFF_ALLMULTI)
     if (sc->tulip_flags & TULIP_ALLMULTI)
 	sc->tulip_if.if_flags |= IFF_ALLMULTI;
 #endif
 }
 
 static void
 tulip_reset(
     tulip_softc_t * const sc)
 {
     tulip_ringinfo_t *ri;
     tulip_desc_t *di;
     u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET);
 
     /*
      * Brilliant.  Simply brilliant.  When switching modes/speeds
      * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS
      * bits in CSR6 and then do a software reset to get the 21140
      * to properly reset its internal pathways to the right places.
      *   Grrrr.
      */
     if (sc->tulip_boardsw->bd_media_preset != NULL)
 	(*sc->tulip_boardsw->bd_media_preset)(sc);
 
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     if (!inreset) {
 	sc->tulip_flags |= TULIP_INRESET;
 	sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW);
 	sc->tulip_if.if_flags &= ~IFF_OACTIVE;
     }
 
     TULIP_CSR_WRITE(sc, csr_txlist, TULIP_KVATOPHYS(sc, &sc->tulip_txinfo.ri_first[0]));
     TULIP_CSR_WRITE(sc, csr_rxlist, TULIP_KVATOPHYS(sc, &sc->tulip_rxinfo.ri_first[0]));
     TULIP_CSR_WRITE(sc, csr_busmode,
 		    (1 << (TULIP_BURSTSIZE(sc->tulip_unit) + 8))
 		    |TULIP_BUSMODE_CACHE_ALIGN8
 		    |TULIP_BUSMODE_READMULTIPLE
 		    |(BYTE_ORDER != LITTLE_ENDIAN ? TULIP_BUSMODE_BIGENDIAN : 0));
 
     sc->tulip_txtimer = 0;
     sc->tulip_txq.ifq_maxlen = TULIP_TXDESCS;
     /*
      * Free all the mbufs that were on the transmit ring.
      */
     for (;;) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_txq, m);
 	if (m == NULL)
 	    break;
 	m_freem(m);
     }
 
     ri = &sc->tulip_txinfo;
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     for (di = ri->ri_first; di < ri->ri_last; di++)
 	di->d_status = 0;
 
     /*
      * We need to collect all the mbufs were on the 
      * receive ring before we reinit it either to put
      * them back on or to know if we have to allocate
      * more.
      */
     ri = &sc->tulip_rxinfo;
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     for (di = ri->ri_first; di < ri->ri_last; di++) {
 	di->d_status = 0;
 	di->d_length1 = 0; di->d_addr1 = 0;
 	di->d_length2 = 0; di->d_addr2 = 0;
     }
     for (;;) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_rxq, m);
 	if (m == NULL)
 	    break;
 	m_freem(m);
     }
 
     /*
      * If tulip_reset is being called recurisvely, exit quickly knowing
      * that when the outer tulip_reset returns all the right stuff will
      * have happened.
      */
     if (inreset)
 	return;
 
     sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR
 	|TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED
 	|TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE|TULIP_STS_LINKFAIL
 	|TULIP_STS_RXSTOPPED;
 
     if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0)
 	(*sc->tulip_boardsw->bd_media_select)(sc);
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET)
 	printf(TULIP_PRINTF_FMT ": tulip_reset: additional reset needed?!?\n",
 	       TULIP_PRINTF_ARGS);
 #endif
     tulip_media_print(sc);
     if (sc->tulip_features & TULIP_HAVE_DUALSENSE)
 	TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status));
 
     sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET
 			 |TULIP_RXACT);
     tulip_addr_filter(sc);
 }
 
 static void
 tulip_init(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_if.if_flags & IFF_UP) {
 	if ((sc->tulip_if.if_flags & IFF_RUNNING) == 0) {
 	    /* initialize the media */
 	    tulip_reset(sc);
 	}
 	sc->tulip_if.if_flags |= IFF_RUNNING;
 	if (sc->tulip_if.if_flags & IFF_PROMISC) {
 	    sc->tulip_flags |= TULIP_PROMISC;
 	    sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS;
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	} else {
 	    sc->tulip_flags &= ~TULIP_PROMISC;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS;
 	    if (sc->tulip_flags & TULIP_ALLMULTI) {
 		sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI;
 	    } else {
 		sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI;
 	    }
 	}
 	sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
 	if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) {
 	    tulip_rx_intr(sc);
 	    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 	} else {
 	    sc->tulip_if.if_flags |= IFF_OACTIVE;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 	}
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	    tulip_txput_setup(sc);
     } else {
 	sc->tulip_if.if_flags &= ~IFF_RUNNING;
 	tulip_reset(sc);
     }
 }
 
 static void
 tulip_rx_intr(
     tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(rxintr)
     tulip_ringinfo_t * const ri = &sc->tulip_rxinfo;
     struct ifnet * const ifp = &sc->tulip_if;
     int fillok = 1;
 #if defined(TULIP_DEBUG)
     int cnt = 0;
 #endif
 
     for (;;) {
 	TULIP_PERFSTART(rxget)
 	struct ether_header eh;
 	tulip_desc_t *eop = ri->ri_nextin;
 	int total_len = 0, last_offset = 0;
 	struct mbuf *ms = NULL, *me = NULL;
 	int accept = 0;
 
 	if (fillok && sc->tulip_rxq.ifq_len < TULIP_RXQ_TARGET)
 	    goto queue_mbuf;
 
 #if defined(TULIP_DEBUG)
 	if (cnt == ri->ri_max)
 	    break;
 #endif
 	/*
 	 * If the TULIP has no descriptors, there can't be any receive
 	 * descriptors to process.
  	 */
 	if (eop == ri->ri_nextout)
 	    break;
 	    
 	/*
 	 * 90% of the packets will fit in one descriptor.  So we optimize
 	 * for that case.
 	 */
 	if ((((volatile tulip_desc_t *) eop)->d_status & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) {
 	    IF_DEQUEUE(&sc->tulip_rxq, ms);
 	    me = ms;
 	} else {
 	    /*
 	     * If still owned by the TULIP, don't touch it.
 	     */
 	    if (((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER)
 		break;
 
 	    /*
 	     * It is possible (though improbable unless the BIG_PACKET support
 	     * is enabled or MCLBYTES < 1518) for a received packet to cross
 	     * more than one receive descriptor.  
 	     */
 	    while ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_RxLASTDESC) == 0) {
 		if (++eop == ri->ri_last)
 		    eop = ri->ri_first;
 		if (eop == ri->ri_nextout || ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER))) {
 #if defined(TULIP_DEBUG)
 		    sc->tulip_dbg.dbg_rxintrs++;
 		    sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
 		    TULIP_PERFEND(rxget);
 		    TULIP_PERFEND(rxintr);
 		    return;
 		}
 		total_len++;
 	    }
 
 	    /*
 	     * Dequeue the first buffer for the start of the packet.  Hopefully
 	     * this will be the only one we need to dequeue.  However, if the
 	     * packet consumed multiple descriptors, then we need to dequeue
 	     * those buffers and chain to the starting mbuf.  All buffers but
 	     * the last buffer have the same length so we can set that now.
 	     * (we add to last_offset instead of multiplying since we normally
 	     * won't go into the loop and thereby saving a ourselves from
 	     * doing a multiplication by 0 in the normal case).
 	     */
 	    IF_DEQUEUE(&sc->tulip_rxq, ms);
 	    for (me = ms; total_len > 0; total_len--) {
 		me->m_len = TULIP_RX_BUFLEN;
 		last_offset += TULIP_RX_BUFLEN;
 		IF_DEQUEUE(&sc->tulip_rxq, me->m_next);
 		me = me->m_next;
 	    }
 	}
 
 	/*
 	 *  Now get the size of received packet (minus the CRC).
 	 */
 	total_len = ((eop->d_status >> 16) & 0x7FFF) - 4;
 	if ((sc->tulip_flags & TULIP_RXIGNORE) == 0
 		&& ((eop->d_status & TULIP_DSTS_ERRSUM) == 0
 #ifdef BIG_PACKET
 		     || (total_len <= sc->tulip_if.if_mtu + sizeof(struct ether_header) && 
 			 (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxRUNT|
 					  TULIP_DSTS_RxCOLLSEEN|TULIP_DSTS_RxBADCRC|
 					  TULIP_DSTS_RxOVERFLOW)) == 0)
 #endif
 		)) {
 	    me->m_len = total_len - last_offset;
 	    eh = *mtod(ms, struct ether_header *);
 #if NBPFILTER > 0
 	    if (sc->tulip_bpf != NULL)
 		if (me == ms)
 		    TULIP_BPF_TAP(sc, mtod(ms, caddr_t), total_len);
 		else
 		    TULIP_BPF_MTAP(sc, ms);
 #endif
 	    sc->tulip_flags |= TULIP_RXACT;
 	    if ((sc->tulip_flags & (TULIP_PROMISC|TULIP_HASHONLY))
 		    && (eh.ether_dhost[0] & 1) == 0
 		    && !TULIP_ADDREQUAL(eh.ether_dhost, sc->tulip_enaddr))
 		    goto next;
 	    accept = 1;
 	    total_len -= sizeof(struct ether_header);
 	} else {
 	    ifp->if_ierrors++;
 	    if (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 	    } else {
 		const char *error = NULL;
 		if (eop->d_status & TULIP_DSTS_RxTOOLONG) {
 		    sc->tulip_dot3stats.dot3StatsFrameTooLongs++;
 		    error = "frame too long";
 		}
 		if (eop->d_status & TULIP_DSTS_RxBADCRC) {
 		    if (eop->d_status & TULIP_DSTS_RxDRBBLBIT) {
 			sc->tulip_dot3stats.dot3StatsAlignmentErrors++;
 			error = "alignment error";
 		    } else {
 			sc->tulip_dot3stats.dot3StatsFCSErrors++;
 			error = "bad crc";
 		    }
 		}
 		if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) {
 		    printf(TULIP_PRINTF_FMT ": receive: " TULIP_EADDR_FMT ": %s\n",
 			   TULIP_PRINTF_ARGS,
 			   TULIP_EADDR_ARGS(mtod(ms, u_char *) + 6),
 			   error);
 		    sc->tulip_flags |= TULIP_NOMESSAGES;
 		}
 	    }
 	}
       next:
 #if defined(TULIP_DEBUG)
 	cnt++;
 #endif
 	ifp->if_ipackets++;
 	if (++eop == ri->ri_last)
 	    eop = ri->ri_first;
 	ri->ri_nextin = eop;
       queue_mbuf:
 	/*
 	 * Either we are priming the TULIP with mbufs (m == NULL)
 	 * or we are about to accept an mbuf for the upper layers
 	 * so we need to allocate an mbuf to replace it.  If we
 	 * can't replace it, send up it anyways.  This may cause
 	 * us to drop packets in the future but that's better than
 	 * being caught in livelock.
 	 *
 	 * Note that if this packet crossed multiple descriptors
 	 * we don't even try to reallocate all the mbufs here.
 	 * Instead we rely on the test of the beginning of
 	 * the loop to refill for the extra consumed mbufs.
 	 */
 	if (accept || ms == NULL) {
 	    struct mbuf *m0;
 	    MGETHDR(m0, M_DONTWAIT, MT_DATA);
 	    if (m0 != NULL) {
 #if defined(TULIP_COPY_RXDATA)
 		if (!accept || total_len >= MHLEN) {
 #endif
 		    MCLGET(m0, M_DONTWAIT);
 		    if ((m0->m_flags & M_EXT) == 0) {
 			m_freem(m0);
 			m0 = NULL;
 		    }
 #if defined(TULIP_COPY_RXDATA)
 		}
 #endif
 	    }
 	    if (accept
 #if defined(TULIP_COPY_RXDATA)
 		&& m0 != NULL
 #endif
 		) {
 #if defined(__bsdi__)
 		eh.ether_type = ntohs(eh.ether_type);
 #endif
 #if !defined(TULIP_COPY_RXDATA)
 		ms->m_data += sizeof(struct ether_header);
 		ms->m_len -= sizeof(struct ether_header);
 		ms->m_pkthdr.len = total_len;
 		ms->m_pkthdr.rcvif = ifp;
 		ether_input(ifp, &eh, ms);
 #else
 #ifdef BIG_PACKET
 #error BIG_PACKET is incompatible with TULIP_COPY_RXDATA
 #endif
 		if (ms == me)
 		    bcopy(mtod(ms, caddr_t) + sizeof(struct ether_header),
 			  mtod(m0, caddr_t), total_len);
 		else
 		    m_copydata(ms, 0, total_len, mtod(m0, caddr_t));
 		m0->m_len = m0->m_pkthdr.len = total_len;
 		m0->m_pkthdr.rcvif = ifp;
 		ether_input(ifp, &eh, m0);
 		m0 = ms;
 #endif
 	    }
 	    ms = m0;
 	}
 	if (ms == NULL) {
 	    /*
 	     * Couldn't allocate a new buffer.  Don't bother 
 	     * trying to replenish the receive queue.
 	     */
 	    fillok = 0;
 	    sc->tulip_flags |= TULIP_RXBUFSLOW;
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_rxlowbufs++;
 #endif
 	    TULIP_PERFEND(rxget);
 	    continue;
 	}
 	/*
 	 * Now give the buffer(s) to the TULIP and save in our
 	 * receive queue.
 	 */
 	do {
 	    ri->ri_nextout->d_length1 = TULIP_RX_BUFLEN;
 	    ri->ri_nextout->d_addr1 = TULIP_KVATOPHYS(sc, mtod(ms, caddr_t));
 	    ri->ri_nextout->d_status = TULIP_DSTS_OWNER;
 	    if (++ri->ri_nextout == ri->ri_last)
 		ri->ri_nextout = ri->ri_first;
 	    me = ms->m_next;
 	    ms->m_next = NULL;
 	    IF_ENQUEUE(&sc->tulip_rxq, ms);
 	} while ((ms = me) != NULL);
 
 	if (sc->tulip_rxq.ifq_len >= TULIP_RXQ_TARGET)
 	    sc->tulip_flags &= ~TULIP_RXBUFSLOW;
 	TULIP_PERFEND(rxget);
     }
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_rxintrs++;
     sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
     TULIP_PERFEND(rxintr);
 }
 
 static int
 tulip_tx_intr(
     tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(txintr)
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     struct mbuf *m;
     int xmits = 0;
     int descs = 0;
 
     while (ri->ri_free < ri->ri_max) {
 	u_int32_t d_flag;
 	if (((volatile tulip_desc_t *) ri->ri_nextin)->d_status & TULIP_DSTS_OWNER)
 	    break;
 
 	d_flag = ri->ri_nextin->d_flag;
 	if (d_flag & TULIP_DFLAG_TxLASTSEG) {
 	    if (d_flag & TULIP_DFLAG_TxSETUPPKT) {
 		/*
 		 * We've just finished processing a setup packet.
 		 * Mark that we finished it.  If there's not
 		 * another pending, startup the TULIP receiver.
 		 * Make sure we ack the RXSTOPPED so we won't get
 		 * an abormal interrupt indication.
 		 */
 		sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY);
 		if (ri->ri_nextin->d_flag & TULIP_DFLAG_TxINVRSFILT)
 		    sc->tulip_flags |= TULIP_HASHONLY;
 		if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) {
 		    tulip_rx_intr(sc);
 		    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 		    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 		    TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 		    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 		}
 	    } else {
 		const u_int32_t d_status = ri->ri_nextin->d_status;
 		IF_DEQUEUE(&sc->tulip_txq, m);
 		if (m != NULL) {
 #if NBPFILTER > 0
 		    if (sc->tulip_bpf != NULL)
 			TULIP_BPF_MTAP(sc, m);
 #endif
 		    m_freem(m);
 #if defined(TULIP_DEBUG)
 		} else {
 		    printf(TULIP_PRINTF_FMT ": tx_intr: failed to dequeue mbuf?!?\n", TULIP_PRINTF_ARGS);
 #endif
 		}
 		if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 		    tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK;
 		    if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) {
 #if defined(TULIP_DEBUG)
 			if (d_status & TULIP_DSTS_TxNOCARR)
 			    sc->tulip_dbg.dbg_txprobe_nocarr++;
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dbg.dbg_txprobe_exccoll++;
 #endif
 			event = TULIP_MEDIAPOLL_TXPROBE_FAILED;
 		    }
 		    (*sc->tulip_boardsw->bd_media_poll)(sc, event);
 		    /*
 		     * Escape from the loop before media poll has reset the TULIP!
 		     */
 		    break;
 		} else {
 		    xmits++;
 		    if (d_status & TULIP_DSTS_ERRSUM) {
 			sc->tulip_if.if_oerrors++;
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dot3stats.dot3StatsExcessiveCollisions++;
 			if (d_status & TULIP_DSTS_TxLATECOLL)
 			    sc->tulip_dot3stats.dot3StatsLateCollisions++;
 			if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS))
 			    sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++;
 			if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE))
 			    sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++;
 			if (d_status & TULIP_DSTS_TxUNDERFLOW)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++;
 			if (d_status & TULIP_DSTS_TxBABBLE)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++;
 		    } else {
 			u_int32_t collisions = 
 			    (d_status & TULIP_DSTS_TxCOLLMASK)
 				>> TULIP_DSTS_V_TxCOLLCNT;
 			sc->tulip_if.if_collisions += collisions;
 			if (collisions == 1)
 			    sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++;
 			else if (collisions > 1)
 			    sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++;
 			else if (d_status & TULIP_DSTS_TxDEFERRED)
 			    sc->tulip_dot3stats.dot3StatsDeferredTransmissions++;
 			/*
 			 * SQE is only valid for 10baseT/BNC/AUI when not
 			 * running in full-duplex.  In order to speed up the
 			 * test, the corresponding bit in tulip_flags needs to
 			 * set as well to get us to count SQE Test Errors.
 			 */
 			if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags)
 			    sc->tulip_dot3stats.dot3StatsSQETestErrors++;
 		    }
 		}
 	    }
 	}
 
 	if (++ri->ri_nextin == ri->ri_last)
 	    ri->ri_nextin = ri->ri_first;
 
 	ri->ri_free++;
 	descs++;
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 	    sc->tulip_if.if_flags &= ~IFF_OACTIVE;
     }
     /*
      * If nothing left to transmit, disable the timer.
      * Else if progress, reset the timer back to 2 ticks.
      */
     if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE))
 	sc->tulip_txtimer = 0;
     else if (xmits > 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
     sc->tulip_if.if_opackets += xmits;
     TULIP_PERFEND(txintr);
     return descs;
 }
 
 static void
 tulip_print_abnormal_interrupt(
     tulip_softc_t * const sc,
     u_int32_t csr)
 {
     const char * const *msgp = tulip_status_bits;
     const char *sep;
     u_int32_t mask;
     const char thrsh[] = "72|128\0\0\096|256\0\0\0128|512\0\0160|1024\0";
 
     csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1;
     printf(TULIP_PRINTF_FMT ": abnormal interrupt:", TULIP_PRINTF_ARGS);
     for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) {
 	if ((csr & mask) && *msgp != NULL) {
 	    printf("%s%s", sep, *msgp);
 	    if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) {
 		sc->tulip_flags &= ~TULIP_NEWTXTHRESH;
 		if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) {
 		    printf(" (switching to store-and-forward mode)");
 		} else {
 		    printf(" (raising TX threshold to %s)",
 			   &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]);
 		}
 	    }
 	    sep = ", ";
 	}
     }
     printf("\n");
 }
 
 static void
 tulip_intr_handler(
     tulip_softc_t * const sc,
     int *progress_p)
 {
     TULIP_PERFSTART(intr)
     u_int32_t csr;
 #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR)
     int only_once;
 
     only_once = 1;
 #endif
 
     while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) {
 #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR)
         if (only_once == 1) {
 #if NRND > 0
 	    rnd_add_uint32(&sc->tulip_rndsource, csr);
 #endif
 	    only_once = 0;
 	}
 #endif
 
 	*progress_p = 1;
 	TULIP_CSR_WRITE(sc, csr_status, csr);
 
 	if (csr & TULIP_STS_SYSERROR) {
 	    sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT;
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_flags |= TULIP_SYSTEMERROR;
 	    } else {
 		printf(TULIP_PRINTF_FMT ": system error: %s\n",
 		       TULIP_PRINTF_ARGS,
 		       tulip_system_errors[sc->tulip_last_system_error]);
 	    }
 	    sc->tulip_flags |= TULIP_NEEDRESET;
 	    sc->tulip_system_errors++;
 	    break;
 	}
 	if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL)) {
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_link_intrs++;
 #endif
 	    if (sc->tulip_boardsw->bd_media_poll != NULL) {
 		(*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL
 						    ? TULIP_MEDIAPOLL_LINKFAIL
 						    : TULIP_MEDIAPOLL_LINKPASS);
 		csr &= ~TULIP_STS_ABNRMLINTR;
 	    }
 	    tulip_media_print(sc);
 	}
 	if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) {
 	    u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames);
 	    if (csr & TULIP_STS_RXNOBUF)
 		sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF;
 	    /*
 	     * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data
 	     * on receive overflows.
 	     */
 	   if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 		/*
 		 * Stop the receiver process and spin until it's stopped.
 		 * Tell rx_intr to drop the packets it dequeues.
 		 */
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN);
 		while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0)
 		    ;
 		TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		sc->tulip_flags |= TULIP_RXIGNORE;
 	    }
 	    tulip_rx_intr(sc);
 	    if (sc->tulip_flags & TULIP_RXIGNORE) {
 		/*
 		 * Restart the receiver.
 		 */
 		sc->tulip_flags &= ~TULIP_RXIGNORE;
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	    }
 	}
 	if (csr & TULIP_STS_ABNRMLINTR) {
 	    u_int32_t tmp = csr & sc->tulip_intrmask
 		& ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR);
 	    if (csr & TULIP_STS_TXUNDERFLOW) {
 		if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) {
 		    sc->tulip_cmdmode += TULIP_CMD_THRSHLD96;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		} else if (sc->tulip_features & TULIP_HAVE_STOREFWD) {
 		    sc->tulip_cmdmode |= TULIP_CMD_STOREFWD;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		}
 	    }
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_statusbits |= tmp;
 	    } else {
 		tulip_print_abnormal_interrupt(sc, tmp);
 		sc->tulip_flags |= TULIP_NOMESSAGES;
 	    }
 	    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	}
 	if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) {
 	    tulip_tx_intr(sc);
 	    if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 		tulip_ifstart(&sc->tulip_if);
 	}
     }
     if (sc->tulip_flags & TULIP_NEEDRESET) {
 	tulip_reset(sc);
 	tulip_init(sc);
     }
     TULIP_PERFEND(intr);
 }
 
 #if defined(TULIP_USE_SOFTINTR)
 /*
  * This is a experimental idea to alleviate problems due to interrupt
  * livelock.  What is interrupt livelock?  It's when you spend all your
  * time servicing device interrupts and never drop below device ipl
  * to do "useful" work.
  *
  * So what we do here is see if the device needs service and if so,
  * disable interrupts (dismiss the interrupt), place it in a list of devices
  * needing service, and issue a network software interrupt.
  *
  * When our network software interrupt routine gets called, we simply
  * walk done the list of devices that we have created and deal with them
  * at splnet/splsoftnet.
  *
  */
 static void
 tulip_hardintr_handler(
     tulip_softc_t * const sc,
     int *progress_p)
 {
     if (TULIP_CSR_READ(sc, csr_status) & (TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR) == 0)
 	return;
     *progress_p = 1;
     /*
      * disable interrupts
      */
     TULIP_CSR_WRITE(sc, csr_intr, 0);
     /*
      * mark it as needing a software interrupt
      */
     tulip_softintr_mask |= (1U << sc->tulip_unit);
 
 #if defined(__NetBSD__) && NRND > 0
     /*
      * This isn't all that random (the value we feed in) but it is
      * better than a constant probably.  It isn't used in entropy
      * calculation anyway, just to add something to the pool.
      */
     rnd_add_uint32(&sc->tulip_rndsource, sc->tulip_flags);
 #endif
 }
 
 static void
 tulip_softintr(
     void)
 {
     u_int32_t softintr_mask, mask;
     int progress = 0;
     int unit;
     tulip_spl_t s;
 
     /*
      * Copy mask to local copy and reset global one to 0.
      */
     s = TULIP_RAISESPL();
     softintr_mask = tulip_softintr_mask;
     tulip_softintr_mask = 0;
     TULIP_RESTORESPL(s);
 
     /*
      * Optimize for the single unit case.
      */
     if (tulip_softintr_max_unit == 0) {
 	if (softintr_mask & 1) {
 	    tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(0);
 	    /*
 	     * Handle the "interrupt" and then reenable interrupts
 	     */
 	    softintr_mask = 0;
 	    tulip_intr_handler(sc, &progress);
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
 	return;
     }
 
     /*
      * Handle all "queued" interrupts in a round robin fashion.
      * This is done so as not to favor a particular interface.
      */
     unit = tulip_softintr_last_unit;
     mask = (1U << unit);
     while (softintr_mask != 0) {
 	if (tulip_softintr_max_unit == unit) {
 	    unit  = 0; mask   = 1;
 	} else {
 	    unit += 1; mask <<= 1;
 	}
 	if (softintr_mask & mask) {
 	    tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(unit);
 	    /*
 	     * Handle the "interrupt" and then reenable interrupts
 	     */
 	    softintr_mask ^= mask;
 	    tulip_intr_handler(sc, &progress);
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     }
 
     /*
      * Save where we ending up.
      */
     tulip_softintr_last_unit = unit;
 }
 #endif	/* TULIP_USE_SOFTINTR */
 
 static tulip_intrfunc_t
 tulip_intr_shared(
     void *arg)
 {
     tulip_softc_t * sc = arg;
     int progress = 0;
 
     for (; sc != NULL; sc = sc->tulip_slaves) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_intrs++;
 #endif
 #if defined(TULIP_USE_SOFTINTR)
 	tulip_hardintr_handler(sc, &progress);
 #else
 	tulip_intr_handler(sc, &progress);
 #endif
     }
 #if defined(TULIP_USE_SOFTINTR)
     if (progress)
 	schednetisr(NETISR_DE);
 #endif
 #if !defined(TULIP_VOID_INTRFUNC)
     return progress;
 #endif
 }
 
 static tulip_intrfunc_t
 tulip_intr_normal(
     void *arg)
 {
     tulip_softc_t * sc = (tulip_softc_t *) arg;
     int progress = 0;
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_intrs++;
 #endif
 #if defined(TULIP_USE_SOFTINTR)
     tulip_hardintr_handler(sc, &progress);
     if (progress)
 	schednetisr(NETISR_DE);
 #else
     tulip_intr_handler(sc, &progress);
 #endif
 #if !defined(TULIP_VOID_INTRFUNC)
     return progress;
 #endif
 }
 
 static struct mbuf *
 tulip_mbuf_compress(
     struct mbuf *m)
 {
     struct mbuf *m0;
 #if MCLBYTES >= ETHERMTU + 18 && !defined(BIG_PACKET)
     MGETHDR(m0, M_DONTWAIT, MT_DATA);
     if (m0 != NULL) {
 	if (m->m_pkthdr.len > MHLEN) {
 	    MCLGET(m0, M_DONTWAIT);
 	    if ((m0->m_flags & M_EXT) == 0) {
 		m_freem(m);
 		m_freem(m0);
 		return NULL;
 	    }
 	}
 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
     }
 #else
     int mlen = MHLEN;
     int len = m->m_pkthdr.len;
     struct mbuf **mp = &m0;
 
     while (len > 0) {
 	if (mlen == MHLEN) {
 	    MGETHDR(*mp, M_DONTWAIT, MT_DATA);
 	} else {
 	    MGET(*mp, M_DONTWAIT, MT_DATA);
 	}
 	if (*mp == NULL) {
 	    m_freem(m0);
 	    m0 = NULL;
 	    break;
 	}
 	if (len > MLEN) {
 	    MCLGET(*mp, M_DONTWAIT);
 	    if (((*mp)->m_flags & M_EXT) == 0) {
 		m_freem(m0);
 		m0 = NULL;
 		break;
 	    }
 	    (*mp)->m_len = len <= MCLBYTES ? len : MCLBYTES;
 	} else {
 	    (*mp)->m_len = len <= mlen ? len : mlen;
 	}
 	m_copydata(m, m->m_pkthdr.len - len,
 		   (*mp)->m_len, mtod((*mp), caddr_t));
 	len -= (*mp)->m_len;
 	mp = &(*mp)->m_next;
 	mlen = MLEN;
     }
 #endif
     m_freem(m);
     return m0;
 }
 
 static struct mbuf *
 tulip_txput(
     tulip_softc_t * const sc,
     struct mbuf *m)
 {
     TULIP_PERFSTART(txput)
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_desc_t *eop, *nextout;
     int segcnt, free;
     u_int32_t d_status;
     struct mbuf *m0;
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	printf(TULIP_PRINTF_FMT ": txput%s: tx not running\n",
 	       TULIP_PRINTF_ARGS,
 	       (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : "");
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	goto finish;
     }
 #endif
 
     /*
      * Now we try to fill in our transmit descriptors.  This is
      * a bit reminiscent of going on the Ark two by two
      * since each descriptor for the TULIP can describe
      * two buffers.  So we advance through packet filling
      * each of the two entries at a time to to fill each
      * descriptor.  Clear the first and last segment bits
      * in each descriptor (actually just clear everything
      * but the end-of-ring or chain bits) to make sure
      * we don't get messed up by previously sent packets.
      *
      * We may fail to put the entire packet on the ring if
      * there is either not enough ring entries free or if the
      * packet has more than MAX_TXSEG segments.  In the former
      * case we will just wait for the ring to empty.  In the
      * latter case we have to recopy.
      */
   again:
     d_status = 0;
     eop = nextout = ri->ri_nextout;
     m0 = m;
     segcnt = 0;
     free = ri->ri_free;
     do {
 	int len = m0->m_len;
 	caddr_t addr = mtod(m0, caddr_t);
 	unsigned clsize = CLBYTES - (((u_long) addr) & (CLBYTES-1));
 
 	while (len > 0) {
 	    unsigned slen = min(len, clsize);
 #ifdef BIG_PACKET
 	    int partial = 0;
 	    if (slen >= 2048)
 		slen = 2040, partial = 1;
 #endif
 	    segcnt++;
 	    if (segcnt > TULIP_MAX_TXSEG) {
 		/*
 		 * The packet exceeds the number of transmit buffer
 		 * entries that we can use for one packet, so we have
 		 * recopy it into one mbuf and then try again.
 		 */
 		m = tulip_mbuf_compress(m);
 		if (m == NULL)
 		    goto finish;
 		goto again;
 	    }
 	    if (segcnt & 1) {
 		if (--free == 0) {
 		    /*
 		     * See if there's any unclaimed space in the
 		     * transmit ring.
 		     */
 		    if ((free += tulip_tx_intr(sc)) == 0) {
 			/*
 			 * There's no more room but since nothing
 			 * has been committed at this point, just
 			 * show output is active, put back the
 			 * mbuf and return.
 			 */
 			sc->tulip_flags |= TULIP_WANTTXSTART;
 			goto finish;
 		    }
 		}
 		eop = nextout;
 		if (++nextout == ri->ri_last)
 		    nextout = ri->ri_first;
 		eop->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
 		eop->d_status = d_status;
 		eop->d_addr1 = TULIP_KVATOPHYS(sc, addr);
 		eop->d_length1 = slen;
 	    } else {
 		/*
 		 *  Fill in second half of descriptor
 		 */
 		eop->d_addr2 = TULIP_KVATOPHYS(sc, addr);
 		eop->d_length2 = slen;
 	    }
 	    d_status = TULIP_DSTS_OWNER;
 	    len -= slen;
 	    addr += slen;
 #ifdef BIG_PACKET
 	    if (partial)
 		continue;
 #endif
 	    clsize = CLBYTES;
 	}
     } while ((m0 = m0->m_next) != NULL);
 
 
     /*
      * The descriptors have been filled in.  Now get ready
      * to transmit.
      */
     IF_ENQUEUE(&sc->tulip_txq, m);
     m = NULL;
 
     /*
      * Make sure the next descriptor after this packet is owned
      * by us since it may have been set up above if we ran out
      * of room in the ring.
      */
     nextout->d_status = 0;
 
     /*
      * If we only used the first segment of the last descriptor,
      * make sure the second segment will not be used.
      */
     if (segcnt & 1) {
 	eop->d_addr2 = 0;
 	eop->d_length2 = 0;
     }
 
     /*
      * Mark the last and first segments, indicate we want a transmit
      * complete interrupt, and tell it to transmit!
      */
     eop->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR;
 
     /*
      * Note that ri->ri_nextout is still the start of the packet
      * and until we set the OWNER bit, we can still back out of
      * everything we have done.
      */
     ri->ri_nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG;
     ri->ri_nextout->d_status = TULIP_DSTS_OWNER;
 
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
 
     /*
      * This advances the ring for us.
      */
     ri->ri_nextout = nextout;
     ri->ri_free = free;
 
     TULIP_PERFEND(txput);
 
     if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	TULIP_PERFEND(txput);
 	return NULL;
     }
 
     /*
      * switch back to the single queueing ifstart.
      */
     sc->tulip_flags &= ~TULIP_WANTTXSTART;
     sc->tulip_if.if_start = tulip_ifstart_one;
     if (sc->tulip_txtimer == 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
 
     /*
      * If we want a txstart, there must be not enough space in the
      * transmit ring.  So we want to enable transmit done interrupts
      * so we can immediately reclaim some space.  When the transmit
      * interrupt is posted, the interrupt handler will call tx_intr
      * to reclaim space and then txstart (since WANTTXSTART is set).
      * txstart will move the packet into the transmit ring and clear
      * WANTTXSTART thereby causing TXINTR to be cleared.
      */
   finish:
     if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	sc->tulip_if.if_start = tulip_ifstart;
 	if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) {
 	if (sc->tulip_intrmask & TULIP_STS_TXINTR) {
 	    sc->tulip_intrmask &= ~TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     }
     TULIP_PERFEND(txput);
     return m;
 }
 
 static void
 tulip_txput_setup(
     tulip_softc_t * const sc)
 {
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_desc_t *nextout;
 	
     /*
      * We will transmit, at most, one setup packet per call to ifstart.
      */
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	printf(TULIP_PRINTF_FMT ": txput_setup: tx not running\n",
 	       TULIP_PRINTF_ARGS);
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	sc->tulip_if.if_start = tulip_ifstart;
 	return;
     }
 #endif
     /*
      * Try to reclaim some free descriptors..
      */
     if (ri->ri_free < 2)
 	tulip_tx_intr(sc);
     if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) {
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	sc->tulip_if.if_start = tulip_ifstart;
 	return;
     }
     bcopy(sc->tulip_setupdata, sc->tulip_setupbuf,
 	  sizeof(sc->tulip_setupbuf));
     /*
      * Clear WANTSETUP and set DOINGSETUP.  Set know that WANTSETUP is
      * set and DOINGSETUP is clear doing an XOR of the two will DTRT.
      */
     sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP;
     ri->ri_free--;
     nextout = ri->ri_nextout;
     nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
     nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG
 	|TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR;
     if (sc->tulip_flags & TULIP_WANTHASHPERFECT)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT;
     else if (sc->tulip_flags & TULIP_WANTHASHONLY)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT;
 
     nextout->d_length1 = sizeof(sc->tulip_setupbuf);
     nextout->d_addr1 = TULIP_KVATOPHYS(sc, sc->tulip_setupbuf);
     nextout->d_length2 = 0;
     nextout->d_addr2 = 0;
 
     /*
      * Advance the ring for the next transmit packet.
      */
     if (++ri->ri_nextout == ri->ri_last)
 	ri->ri_nextout = ri->ri_first;
 
     /*
      * Make sure the next descriptor is owned by us since it
      * may have been set up above if we ran out of room in the
      * ring.
      */
     ri->ri_nextout->d_status = 0;
     nextout->d_status = TULIP_DSTS_OWNER;
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
     if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     }
 }
 
 
 /*
  * This routine is entered at splnet() (splsoftnet() on NetBSD)
  * and thereby imposes no problems when TULIP_USE_SOFTINTR is 
  * defined or not.
  */
 static int
 tulip_ifioctl(
     struct ifnet * ifp,
     ioctl_cmd_t cmd,
     caddr_t data)
 {
     TULIP_PERFSTART(ifioctl)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
     struct ifaddr *ifa = (struct ifaddr *)data;
     struct ifreq *ifr = (struct ifreq *) data;
     tulip_spl_t s;
     int error = 0;
 
 #if defined(TULIP_USE_SOFTINTR)
     s = TULIP_RAISESOFTSPL();
 #else
     s = TULIP_RAISESPL();
 #endif
     switch (cmd) {
 	case SIOCSIFADDR: {
 	    ifp->if_flags |= IFF_UP;
 	    switch(ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET: {
 		    tulip_init(sc);
 		    TULIP_ARP_IFINIT(sc, ifa);
 		    break;
 		}
 #endif /* INET */
 
 #ifdef IPX
 		case AF_IPX: {
 		    struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
 		    if (ipx_nullhost(*ina)) {
 			ina->x_host = *(union ipx_host *)(sc->tulip_enaddr);
 		    } else {
 			ifp->if_flags &= ~IFF_RUNNING;
 			bcopy((caddr_t)ina->x_host.c_host,
 			      (caddr_t)sc->tulip_enaddr,
 			      sizeof(sc->tulip_enaddr));
 		    }
 		    tulip_init(sc);
 		    break;
 		}
 #endif /* IPX */
 
 #ifdef NS
 		/*
 		 * This magic copied from if_is.c; I don't use XNS,
 		 * so I have no way of telling if this actually
 		 * works or not.
 		 */
 		case AF_NS: {
 		    struct ns_addr *ina = &(IA_SNS(ifa)->sns_addr);
 		    if (ns_nullhost(*ina)) {
 			ina->x_host = *(union ns_host *)(sc->tulip_enaddr);
 		    } else {
 			ifp->if_flags &= ~IFF_RUNNING;
 			bcopy((caddr_t)ina->x_host.c_host,
 			      (caddr_t)sc->tulip_enaddr,
 			      sizeof(sc->tulip_enaddr));
 		    }
 		    tulip_init(sc);
 		    break;
 		}
 #endif /* NS */
 
 		default: {
 		    tulip_init(sc);
 		    break;
 		}
 	    }
 	    break;
 	}
 	case SIOCGIFADDR: {
 	    bcopy((caddr_t) sc->tulip_enaddr,
 		  (caddr_t) ((struct sockaddr *)&ifr->ifr_data)->sa_data,
 		  6);
 	    break;
 	}
 
 	case SIOCSIFFLAGS: {
 #if !defined(IFM_ETHER)
 	    int flags = 0;
 	    if (ifp->if_flags & IFF_LINK0) flags |= 1;
 	    if (ifp->if_flags & IFF_LINK1) flags |= 2;
 	    if (ifp->if_flags & IFF_LINK2) flags |= 4;
 	    if (flags == 7) {
 		ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
 		sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 		sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP|TULIP_NOAUTOSENSE);
 		tulip_reset(sc);
 	    } else if (flags) {
 		tulip_media_t media;
 		for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 		    if (sc->tulip_mediums[media] != NULL && --flags == 0) {
 			sc->tulip_flags |= TULIP_NOAUTOSENSE;
 			if (sc->tulip_media != media || (sc->tulip_flags & TULIP_DIDNWAY)) {
 			    sc->tulip_flags &= ~TULIP_DIDNWAY;
 			    tulip_linkup(sc, media);
 			}
 			break;
 		    }
 		}
 		if (flags)
 		    printf(TULIP_PRINTF_FMT ": ignored invalid media request\n", TULIP_PRINTF_ARGS);
 	    }
 #endif
 	    tulip_init(sc);
 	    break;
 	}
 
 #if defined(SIOCSIFMEDIA)
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA: {
 	    error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd);
 	    break;
 	}
 #endif
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI: {
 	    /*
 	     * Update multicast listeners
 	     */
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	    tulip_addr_filter(sc);		/* reset multicast filtering */
 	    tulip_init(sc);
 	    error = 0;
 #else
 	    if (cmd == SIOCADDMULTI)
 		error = ether_addmulti(ifr, TULIP_ETHERCOM(sc));
 	    else
 		error = ether_delmulti(ifr, TULIP_ETHERCOM(sc));
 
 	    if (error == ENETRESET) {
 		tulip_addr_filter(sc);		/* reset multicast filtering */
 		tulip_init(sc);
 		error = 0;
 	    }
 #endif
 	    break;
 	}
 #if defined(SIOCSIFMTU)
 #if !defined(ifr_mtu)
 #define ifr_mtu ifr_metric
 #endif
 	case SIOCSIFMTU:
 	    /*
 	     * Set the interface MTU.
 	     */
 	    if (ifr->ifr_mtu > ETHERMTU
 #ifdef BIG_PACKET
 		    && sc->tulip_chipid != TULIP_21140
 		    && sc->tulip_chipid != TULIP_21140A
 		    && sc->tulip_chipid != TULIP_21041
 #endif
 		) {
 		error = EINVAL;
 		break;
 	    }
 	    ifp->if_mtu = ifr->ifr_mtu;
 #ifdef BIG_PACKET
 	    tulip_reset(sc);
 	    tulip_init(sc);
 #endif
 	    break;
 #endif /* SIOCSIFMTU */
 
 #ifdef SIOCGADDRROM
 	case SIOCGADDRROM: {
 	    error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf));
 	    break;
 	}
 #endif
 #ifdef SIOCGCHIPID
 	case SIOCGCHIPID: {
 	    ifr->ifr_metric = (int) sc->tulip_chipid;
 	    break;
 	}
 #endif
 	default: {
 	    error = EINVAL;
 	    break;
 	}
     }
 
     TULIP_RESTORESPL(s);
     TULIP_PERFEND(ifioctl);
     return error;
 }
 
 /*
  * These routines gets called at device spl (from ether_output).  This might
  * pose a problem for TULIP_USE_SOFTINTR if ether_output is called at
  * device spl from another driver.
  */
 
 static ifnet_ret_t
 tulip_ifstart(
     struct ifnet * const ifp)
 {
     TULIP_PERFSTART(ifstart)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     if (sc->tulip_if.if_flags & IFF_RUNNING) {
 
 	if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	    tulip_txput_setup(sc);
 
 	while (sc->tulip_if.if_snd.ifq_head != NULL) {
 	    struct mbuf *m;
 	    IF_DEQUEUE(&sc->tulip_if.if_snd, m);
 	    if ((m = tulip_txput(sc, m)) != NULL) {
 		IF_PREPEND(&sc->tulip_if.if_snd, m);
 		break;
 	    }
 	}
     }
 
     TULIP_PERFEND(ifstart);
 }
 
 static ifnet_ret_t
 tulip_ifstart_one(
     struct ifnet * const ifp)
 {
     TULIP_PERFSTART(ifstart_one)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     if ((sc->tulip_if.if_flags & IFF_RUNNING)
 	    && sc->tulip_if.if_snd.ifq_head != NULL) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_if.if_snd, m);
 	if ((m = tulip_txput(sc, m)) != NULL)
 	    IF_PREPEND(&sc->tulip_if.if_snd, m);
     }
     TULIP_PERFEND(ifstart_one);
 }
 
 /*
  * Even though this routine runs at device spl, it does not break
  * our use of splnet (splsoftnet under NetBSD) for the majority
  * of this driver (if TULIP_USE_SOFTINTR defined) since 
  * if_watcbog is called from if_watchdog which is called from
  * splsoftclock which is below spl[soft]net.
  */
 static void
 tulip_ifwatchdog(
     struct ifnet *ifp)
 {
     TULIP_PERFSTART(ifwatchdog)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
 #if defined(TULIP_DEBUG)
     u_int32_t rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs;
     if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz)
 	sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs;
     sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs;
 #endif /* TULIP_DEBUG */
 
     sc->tulip_if.if_timer = 1;
     /*
      * These should be rare so do a bulk test up front so we can just skip
      * them if needed.
      */
     if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) {
 	/*
 	 * If the number of receive buffer is low, try to refill
 	 */
 	if (sc->tulip_flags & TULIP_RXBUFSLOW)
 	    tulip_rx_intr(sc);
 
 	if (sc->tulip_flags & TULIP_SYSTEMERROR) {
 	    printf(TULIP_PRINTF_FMT ": %d system errors: last was %s\n",
 		   TULIP_PRINTF_ARGS, sc->tulip_system_errors,
 		   tulip_system_errors[sc->tulip_last_system_error]);
 	}
 	if (sc->tulip_statusbits) {
 	    tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits);
 	    sc->tulip_statusbits = 0;
 	}
 
 	sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR);
     }
 
     if (sc->tulip_txtimer)
 	tulip_tx_intr(sc);
     if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) {
 	printf(TULIP_PRINTF_FMT ": transmission timeout\n", TULIP_PRINTF_ARGS);
 	if (TULIP_DO_AUTOSENSE(sc)) {
 	    sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 	    sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP);
 	}
 	tulip_reset(sc);
 	tulip_init(sc);
     }
 
     TULIP_PERFEND(ifwatchdog);
     TULIP_PERFMERGE(sc, perf_intr_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_cycles);
     TULIP_PERFMERGE(sc, perf_ifioctl_cycles);
     TULIP_PERFMERGE(sc, perf_ifwatchdog_cycles);
     TULIP_PERFMERGE(sc, perf_timeout_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_one_cycles);
     TULIP_PERFMERGE(sc, perf_txput_cycles);
     TULIP_PERFMERGE(sc, perf_txintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxget_cycles);
     TULIP_PERFMERGE(sc, perf_intr);
     TULIP_PERFMERGE(sc, perf_ifstart);
     TULIP_PERFMERGE(sc, perf_ifioctl);
     TULIP_PERFMERGE(sc, perf_ifwatchdog);
     TULIP_PERFMERGE(sc, perf_timeout);
     TULIP_PERFMERGE(sc, perf_ifstart_one);
     TULIP_PERFMERGE(sc, perf_txput);
     TULIP_PERFMERGE(sc, perf_txintr);
     TULIP_PERFMERGE(sc, perf_rxintr);
     TULIP_PERFMERGE(sc, perf_rxget);
 }
 
 #if defined(__bsdi__) || (defined(__FreeBSD__) && BSD < 199506)
 static ifnet_ret_t
 tulip_ifwatchdog_wrapper(
     int unit)
 {
     tulip_ifwatchdog(&TULIP_UNIT_TO_SOFTC(unit)->tulip_if);
 }
 #define	tulip_ifwatchdog	tulip_ifwatchdog_wrapper
 #endif
 
 /*
  * All printf's are real as of now!
  */
 #ifdef printf
 #undef printf
 #endif
 #if !defined(IFF_NOTRAILERS)
 #define IFF_NOTRAILERS		0
 #endif
 
 static void
 tulip_attach(
     tulip_softc_t * const sc)
 {
     struct ifnet * const ifp = &sc->tulip_if;
 
     ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
     ifp->if_ioctl = tulip_ifioctl;
     ifp->if_start = tulip_ifstart;
     ifp->if_watchdog = tulip_ifwatchdog;
     ifp->if_timer = 1;
 #if !defined(__bsdi__) || _BSDI_VERSION < 199401
     ifp->if_output = ether_output;
 #endif
 #if defined(__bsdi__) && _BSDI_VERSION < 199401
     ifp->if_mtu = ETHERMTU;
 #endif
   
 #if defined(__bsdi__) && _BSDI_VERSION >= 199510
     aprint_naive(": DEC Ethernet");
     aprint_normal(": %s%s", sc->tulip_boardid,
         tulip_chipdescs[sc->tulip_chipid]);
     aprint_verbose(" pass %d.%d", (sc->tulip_revinfo & 0xF0) >> 4,
         sc->tulip_revinfo & 0x0F);
     printf("\n");
     sc->tulip_pf = aprint_normal;
     aprint_normal(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n",
 		  TULIP_PRINTF_ARGS,
 		  TULIP_EADDR_ARGS(sc->tulip_enaddr));
 #else
     printf(
 #if defined(__bsdi__)
 	   "\n"
 #endif
 	   TULIP_PRINTF_FMT ": %s%s pass %d.%d%s\n",
 	   TULIP_PRINTF_ARGS,
 	   sc->tulip_boardid,
 	   tulip_chipdescs[sc->tulip_chipid],
 	   (sc->tulip_revinfo & 0xF0) >> 4,
 	   sc->tulip_revinfo & 0x0F,
 	   (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM))
 		 == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : "");
     printf(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n",
 	   TULIP_PRINTF_ARGS,
 	   TULIP_EADDR_ARGS(sc->tulip_enaddr));
 #endif
 
 #if defined(__alpha__)
     /*
      * In case the SRM console told us about a bogus media,
      * we need to check to be safe.
      */
     if (sc->tulip_mediums[sc->tulip_media] == NULL)
 	sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 #endif
 
     (*sc->tulip_boardsw->bd_media_probe)(sc);
 #if defined(IFM_ETHER)
     ifmedia_init(&sc->tulip_ifmedia, 0,
 		 tulip_ifmedia_change,
 		 tulip_ifmedia_status);
 #else
     {
 	tulip_media_t media;
 	int cnt;
 	printf(TULIP_PRINTF_FMT ": media:", TULIP_PRINTF_ARGS);
 	for (media = TULIP_MEDIA_UNKNOWN, cnt = 1; cnt < 7 && media < TULIP_MEDIA_MAX; media++) {
 	    if (sc->tulip_mediums[media] != NULL) {
 		printf(" %d=\"%s\"", cnt, tulip_mediums[media]);
 		cnt++;
 	    }
 	}
 	if (cnt == 1) {
 	    sc->tulip_features |= TULIP_HAVE_NOMEDIA;
 	    printf(" none\n");
 	} else {
 	    printf("\n");
 	}
     }
 #endif
     sc->tulip_flags &= ~TULIP_DEVICEPROBE;
 #if defined(IFM_ETHER)
     tulip_ifmedia_add(sc);
 #endif
 
     tulip_reset(sc);
 
 #if defined(__bsdi__) && _BSDI_VERSION >= 199510
     sc->tulip_pf = printf;
     TULIP_ETHER_IFATTACH(sc);
 #else
     if_attach(ifp);
 #if defined(__NetBSD__) || (defined(__FreeBSD__) && BSD >= 199506)
     TULIP_ETHER_IFATTACH(sc);
 #endif
 #endif /* __bsdi__ */
 
 #if NBPFILTER > 0
     TULIP_BPF_ATTACH(sc);
 #endif
 
 #if defined(__NetBSD__) && NRND > 0
     rnd_attach_source(&sc->tulip_rndsource, sc->tulip_dev.dv_xname,
 		      RND_TYPE_NET);
 #endif
 }
 
 static void
 tulip_initcsrs(
     tulip_softc_t * const sc,
     tulip_csrptr_t csr_base,
     size_t csr_size)
 {
     sc->tulip_csrs.csr_busmode		= csr_base +  0 * csr_size;
     sc->tulip_csrs.csr_txpoll		= csr_base +  1 * csr_size;
     sc->tulip_csrs.csr_rxpoll		= csr_base +  2 * csr_size;
     sc->tulip_csrs.csr_rxlist		= csr_base +  3 * csr_size;
     sc->tulip_csrs.csr_txlist		= csr_base +  4 * csr_size;
     sc->tulip_csrs.csr_status		= csr_base +  5 * csr_size;
     sc->tulip_csrs.csr_command		= csr_base +  6 * csr_size;
     sc->tulip_csrs.csr_intr		= csr_base +  7 * csr_size;
     sc->tulip_csrs.csr_missed_frames	= csr_base +  8 * csr_size;
     sc->tulip_csrs.csr_9		= csr_base +  9 * csr_size;
     sc->tulip_csrs.csr_10		= csr_base + 10 * csr_size;
     sc->tulip_csrs.csr_11		= csr_base + 11 * csr_size;
     sc->tulip_csrs.csr_12		= csr_base + 12 * csr_size;
     sc->tulip_csrs.csr_13		= csr_base + 13 * csr_size;
     sc->tulip_csrs.csr_14		= csr_base + 14 * csr_size;
     sc->tulip_csrs.csr_15		= csr_base + 15 * csr_size;
 #if defined(TULIP_EISA)
     sc->tulip_csrs.csr_enetrom		= csr_base + DE425_ENETROM_OFFSET;
 #endif
 }
 
 static void
 tulip_initring(
     tulip_softc_t * const sc,
     tulip_ringinfo_t * const ri,
     tulip_desc_t *descs,
     int ndescs)
 {
     ri->ri_max = ndescs;
     ri->ri_first = descs;
     ri->ri_last = ri->ri_first + ri->ri_max;
     bzero((caddr_t) ri->ri_first, sizeof(ri->ri_first[0]) * ri->ri_max);
     ri->ri_last[-1].d_flag = TULIP_DFLAG_ENDRING;
 }
 
 /*
  * This is the PCI configuration support.  Since the 21040 is available
  * on both EISA and PCI boards, one must be careful in how defines the
  * 21040 in the config file.
  */
 
 #define	PCI_CFID	0x00	/* Configuration ID */
 #define	PCI_CFCS	0x04	/* Configurtion Command/Status */
 #define	PCI_CFRV	0x08	/* Configuration Revision */
 #define	PCI_CFLT	0x0c	/* Configuration Latency Timer */
 #define	PCI_CBIO	0x10	/* Configuration Base IO Address */
 #define	PCI_CBMA	0x14	/* Configuration Base Memory Address */
 #define	PCI_CFIT	0x3c	/* Configuration Interrupt */
 #define	PCI_CFDA	0x40	/* Configuration Driver Area */
 
 #if defined(TULIP_EISA)
 static const int tulip_eisa_irqs[4] = { IRQ5, IRQ9, IRQ10, IRQ11 };
 #endif
 
 #if defined(__FreeBSD__)
 
 #define	TULIP_PCI_ATTACH_ARGS	pcici_t config_id, int unit
 #define	TULIP_SHUTDOWN_ARGS	int howto, void * arg
 
 #if defined(TULIP_DEVCONF)
 static void tulip_shutdown(TULIP_SHUTDOWN_ARGS);
 
 static int
 tulip_pci_shutdown(
     struct kern_devconf * const kdc,
     int force)
 {
     if (kdc->kdc_unit < TULIP_MAX_DEVICES) {
 	tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(kdc->kdc_unit);
 	if (sc != NULL)
 	    tulip_shutdown(0, sc);
     }
     (void) dev_detach(kdc);
     return 0;
 }
 #endif
 
 static char*
 tulip_pci_probe(
     pcici_t config_id,
     pcidi_t device_id)
 {
     if (PCI_VENDORID(device_id) != DEC_VENDORID)
 	return NULL;
     if (PCI_CHIPID(device_id) == CHIPID_21040)
 	return "Digital 21040 Ethernet";
     if (PCI_CHIPID(device_id) == CHIPID_21041)
 	return "Digital 21041 Ethernet";
     if (PCI_CHIPID(device_id) == CHIPID_21140) {
 	u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF;
 	if (revinfo >= 0x20)
 	    return "Digital 21140A Fast Ethernet";
 	else
 	    return "Digital 21140 Fast Ethernet";
     }
     if (PCI_CHIPID(device_id) == CHIPID_21142) {
 	u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF;
 	if (revinfo >= 0x20)
 	    return "Digital 21143 Fast Ethernet";
 	else
 	    return "Digital 21142 Fast Ethernet";
     }
     return NULL;
 }
 
 static void  tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 static u_long tulip_pci_count;
 
 static struct pci_device dedevice = {
     "de",
     tulip_pci_probe,
     tulip_pci_attach,
    &tulip_pci_count,
 #if defined(TULIP_DEVCONF)
     tulip_pci_shutdown,
 #endif
 };
 
 DATA_SET (pcidevice_set, dedevice);
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
 #define	TULIP_PCI_ATTACH_ARGS	struct device * const parent, struct device * const self, void * const aux
 #define	TULIP_SHUTDOWN_ARGS	void *arg
 
 static int
 tulip_pci_match(
     pci_devaddr_t *pa)
 {
     int irq;
     unsigned id;
 
     id = pci_inl(pa, PCI_VENDOR_ID);
     if (PCI_VENDORID(id) != DEC_VENDORID)
 	return 0;
     id = PCI_CHIPID(id);
     if (id != CHIPID_21040 && id != CHIPID_21041
 	    && id != CHIPID_21140 && id != CHIPID_21142)
 	return 0;
     irq = pci_inl(pa, PCI_I_LINE) & 0xFF;
     if (irq == 0 || irq >= 16) {
 	printf("de?: invalid IRQ %d; skipping\n", irq);
 	return 0;
     }
     return 1;
 }
 
 static int
 tulip_probe(
     struct device *parent,
     struct cfdata *cf,
     void *aux)
 {
     struct isa_attach_args * const ia = (struct isa_attach_args *) aux;
     unsigned irq, slot;
     pci_devaddr_t *pa;
 
 #if _BSDI_VERSION >= 199401
     switch (ia->ia_bustype) {
     case BUS_PCI:
 #endif
 	pa = pci_scan(tulip_pci_match);
 	if (pa == NULL)
 	    return 0;
 
 	irq = (1 << (pci_inl(pa, PCI_I_LINE) & 0xFF));
 
 	/* Get the base address; assume the BIOS set it up correctly */
 #if defined(TULIP_IOMAPPED)
 	ia->ia_maddr = NULL;
 	ia->ia_msize = 0;
 	ia->ia_iobase = pci_inl(pa, PCI_CBIO) & ~7;
 	pci_outl(pa, PCI_CBIO, 0xFFFFFFFF);
 	ia->ia_iosize = ((~pci_inl(pa, PCI_CBIO)) | 7) + 1;
 	pci_outl(pa, PCI_CBIO, (int) ia->ia_iobase);
 
 	/* Disable memory space access */
 	pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~2);
 #else
 	ia->ia_maddr = (caddr_t) (pci_inl(pa, PCI_CBMA) & ~7);
 	pci_outl(pa, PCI_CBMA, 0xFFFFFFFF);
 	ia->ia_msize = ((~pci_inl(pa, PCI_CBMA)) | 7) + 1;
 	pci_outl(pa, PCI_CBMA, (int) ia->ia_maddr);
 	ia->ia_iobase = 0;
 	ia->ia_iosize = 0;
 
 	/* Disable I/O space access */
 	pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~1);
 #endif /* TULIP_IOMAPPED */
 
 	ia->ia_aux = (void *) pa;
 #if _BSDI_VERSION >= 199401
 	break;
 
 #if defined(TULIP_EISA)
     case BUS_EISA: {
 	unsigned tmp;
 
 	if ((slot = eisa_match(cf, ia)) == 0)
 	    return 0;
 	ia->ia_iobase = slot << 12;
 	ia->ia_iosize = EISA_NPORT;
 	eisa_slotalloc(slot);
 	tmp = inb(ia->ia_iobase + DE425_CFG0);
 	irq = tulip_eisa_irqs[(tmp >> 1) & 0x03];
 	/*
 	 * Until BSD/OS likes level interrupts, force
 	 * the DE425 into edge-triggered mode.
 	 */
 	if ((tmp & 1) == 0)
 	    outb(ia->ia_iobase + DE425_CFG0, tmp | 1);
 	/*
 	 * CBIO needs to map to the EISA slot
 	 * enable I/O access and Master
 	 */
 	outl(ia->ia_iobase + DE425_CBIO, ia->ia_iobase);
 	outl(ia->ia_iobase + DE425_CFCS, 5 | inl(ia->ia_iobase + DE425_CFCS));
 	ia->ia_aux = NULL;
 	break;
     }
 #endif /* TULIP_EISA */
     default:
 	return 0;
     }
 #endif
 
     /* PCI bus masters don't use host DMA channels */
     ia->ia_drq = DRQNONE;
 
     if (ia->ia_irq != IRQUNK && irq != ia->ia_irq) {
 	printf("de%d: error: desired IRQ of %d does not match device's "
 	    "actual IRQ of %d,\n",
 	       cf->cf_unit,
 	       ffs(ia->ia_irq) - 1, ffs(irq) - 1);
 	return 0;
     }
     if (ia->ia_irq == IRQUNK)
 	ia->ia_irq = irq;
 #ifdef IRQSHARE
     ia->ia_irq |= IRQSHARE;
 #endif
     return 1;
 }
 
 static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 
 #if defined(TULIP_EISA)
 static char *tulip_eisa_ids[] = {
     "DEC4250",
     NULL
 };
 #endif
 
 struct cfdriver decd = {
     0, "de", tulip_probe, tulip_pci_attach,
 #if _BSDI_VERSION >= 199401
     DV_IFNET,
 #endif
     sizeof(tulip_softc_t),
 #if defined(TULIP_EISA)
     tulip_eisa_ids
 #endif
 };
 
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
 #define	TULIP_PCI_ATTACH_ARGS	struct device * const parent, struct device * const self, void * const aux
 #define	TULIP_SHUTDOWN_ARGS	void *arg
 static int
 tulip_pci_probe(
     struct device *parent,
 #ifdef __BROKEN_INDIRECT_CONFIG
     void *match,
 #else
     struct cfdata *match,
 #endif
     void *aux)
 {
     struct pci_attach_args *pa = (struct pci_attach_args *) aux;
 
     if (PCI_VENDORID(pa->pa_id) != DEC_VENDORID)
 	return 0;
     if (PCI_CHIPID(pa->pa_id) == CHIPID_21040
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21041
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21140
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21142)
 	return 1;
 
     return 0;
 }
 
 static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 
 struct cfattach de_ca = {
     sizeof(tulip_softc_t), tulip_pci_probe, tulip_pci_attach
 };
 
 struct cfdriver de_cd = {
     0, "de", DV_IFNET
 };
 
 #endif /* __NetBSD__ */
 
 static void
 tulip_shutdown(
     TULIP_SHUTDOWN_ARGS)
 {
     tulip_softc_t * const sc = arg;
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 }
 
 static void
 tulip_pci_attach(
     TULIP_PCI_ATTACH_ARGS)
 {
 #if defined(__FreeBSD__)
     tulip_softc_t *sc;
 #define	PCI_CONF_WRITE(r, v)	pci_conf_write(config_id, (r), (v))
 #define	PCI_CONF_READ(r)	pci_conf_read(config_id, (r))
 #if __FreeBSD__ >= 3
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = (config_id->bus), /* XXX */ \
 					(sc)->tulip_pci_devno = (config_id->slot))) /* XXX */
 #else
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = ((config_id.cfg1 >> 16) & 0xFF), /* XXX */ \
 					(sc)->tulip_pci_devno = ((config_id.cfg1 >> 11) & 0x1F))) /* XXX */
 #endif
 #endif
 #if defined(__bsdi__)
     tulip_softc_t * const sc = (tulip_softc_t *) self;
     struct isa_attach_args * const ia = (struct isa_attach_args *) aux;
     pci_devaddr_t *pa = (pci_devaddr_t *) ia->ia_aux;
     const int unit = sc->tulip_dev.dv_unit;
 #define	PCI_CONF_WRITE(r, v)	pci_outl(pa, (r), (v))
 #define	PCI_CONF_READ(r)	pci_inl(pa, (r))
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = pa->d_bus, \
 					(sc)->tulip_pci_devno = pa->d_agent))
 #endif
 #if defined(__NetBSD__)
     tulip_softc_t * const sc = (tulip_softc_t *) self;
     struct pci_attach_args * const pa = (struct pci_attach_args *) aux;
     const int unit = sc->tulip_dev.dv_unit;
 #define	PCI_CONF_WRITE(r, v)	pci_conf_write(pa->pa_pc, pa->pa_tag, (r), (v))
 #define	PCI_CONF_READ(r)	pci_conf_read(pa->pa_pc, pa->pa_tag, (r))
 #define	PCI_GETBUSDEVINFO(sc)	do { \
 	(sc)->tulip_pci_busno = parent; \
 	(sc)->tulip_pci_devno = pa->pa_device; \
     } while (0)
 #endif /* __NetBSD__ */
 #if defined(__alpha__)
     tulip_media_t media = TULIP_MEDIA_UNKNOWN;
 #endif
     int retval, idx;
     u_int32_t revinfo, cfdainfo, id;
 #if !defined(TULIP_IOMAPPED) && defined(__FreeBSD__)
     vm_offset_t pa_csrs;
 #endif
     unsigned csroffset = TULIP_PCI_CSROFFSET;
     unsigned csrsize = TULIP_PCI_CSRSIZE;
     tulip_csrptr_t csr_base;
     tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN;
 
     if (unit >= TULIP_MAX_DEVICES) {
 #ifdef __FreeBSD__
 	printf("de%d", unit);
 #endif
 	printf(": not configured; limit of %d reached or exceeded\n",
 	       TULIP_MAX_DEVICES);
 	return;
     }
 
 #if defined(__bsdi__)
     if (pa != NULL) {
 	revinfo = pci_inl(pa, PCI_CFRV) & 0xFF;
 	id = pci_inl(pa, PCI_CFID);
 	cfdainfo = pci_inl(pa, PCI_CFDA);
 #if defined(TULIP_EISA)
     } else {
 	revinfo = inl(ia->ia_iobase + DE425_CFRV) & 0xFF;
 	csroffset = TULIP_EISA_CSROFFSET;
 	csrsize = TULIP_EISA_CSRSIZE;
 	chipid = TULIP_DE425;
 	cfdainfo = 0;
 #endif /* TULIP_EISA */
     }
 #else /* __bsdi__ */
     revinfo  = PCI_CONF_READ(PCI_CFRV) & 0xFF;
     id       = PCI_CONF_READ(PCI_CFID);
     cfdainfo = PCI_CONF_READ(PCI_CFDA);
 #endif /* __bsdi__ */
 
     if (PCI_VENDORID(id) == DEC_VENDORID) {
 	if (PCI_CHIPID(id) == CHIPID_21040) chipid = TULIP_21040;
 	else if (PCI_CHIPID(id) == CHIPID_21140) {
 	    chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140;
 	} else if (PCI_CHIPID(id) == CHIPID_21142) {
 	    chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142;
 	}
 	else if (PCI_CHIPID(id) == CHIPID_21041) chipid = TULIP_21041;
 	else if (PCI_CHIPID(id) == CHIPID_21142) chipid = TULIP_21142;
     }
     if (chipid == TULIP_CHIPID_UNKNOWN)
 	return;
 
     if ((chipid == TULIP_21040 || chipid == TULIP_DE425) && revinfo < 0x20) {
 #ifdef __FreeBSD__
 	printf("de%d", unit);
 #endif
 	printf(": not configured; 21040 pass 2.0 required (%d.%d found)\n",
 	       revinfo >> 4, revinfo & 0x0f);
 	return;
     } else if (chipid == TULIP_21140 && revinfo < 0x11) {
 #ifndef __FreeBSD__
 	printf("\n");
 #endif
 	printf("de%d: not configured; 21140 pass 1.1 required (%d.%d found)\n",
 	       unit, revinfo >> 4, revinfo & 0x0f);
 	return;
     }
 
 #if defined(__FreeBSD__)
     sc = (tulip_softc_t *) malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT);
     if (sc == NULL)
 	return;
     bzero(sc, sizeof(*sc));				/* Zero out the softc*/
     sc->tulip_rxdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_RXDESCS, M_DEVBUF, M_NOWAIT);
     sc->tulip_txdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_TXDESCS, M_DEVBUF, M_NOWAIT);
     if (sc->tulip_rxdescs == NULL || sc->tulip_txdescs == NULL) {
 	if (sc->tulip_rxdescs)
 	    free((caddr_t) sc->tulip_rxdescs, M_DEVBUF);
 	if (sc->tulip_txdescs)
 	    free((caddr_t) sc->tulip_txdescs, M_DEVBUF);
 	free((caddr_t) sc, M_DEVBUF);
 	return;
     }
 #endif
 
     PCI_GETBUSDEVINFO(sc);
     sc->tulip_chipid = chipid;
     sc->tulip_flags |= TULIP_DEVICEPROBE;
     if (chipid == TULIP_21140 || chipid == TULIP_21140A)
 	sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD;
     if (chipid == TULIP_21140A && revinfo <= 0x22)
 	sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW;
     if (chipid == TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_BROKEN_HASH;
     if (chipid != TULIP_21040 && chipid != TULIP_DE425 && chipid != TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_POWERMGMT;
     if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) {
 	sc->tulip_features |= TULIP_HAVE_DUALSENSE;
 	if (chipid != TULIP_21041 || sc->tulip_revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIANWAY;
 	if (chipid != TULIP_21041)
 	    sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD;
 	if (chipid != TULIP_21041 && sc->tulip_revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIA100;
     }
 
     if (sc->tulip_features & TULIP_HAVE_POWERMGMT
 	    && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) {
 	cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE);
 	PCI_CONF_WRITE(PCI_CFDA, cfdainfo);
 	DELAY(11*1000);
     }
 #if defined(__alpha__) && defined(__NetBSD__)
     /*
      * The Alpha SRM console encodes a console set media in the driver
      * part of the CFDA register.  Note that the Multia presents a
      * problem in that its BNC mode is really EXTSIA.  So in that case
      * force a probe.
      */
     switch ((cfdainfo >> 8) & 0xff) {
     case 1: media = chipid > TULIP_DE425 ?
         TULIP_MEDIA_AUI : TULIP_MEDIA_AUIBNC; break;
     case 2: media = chipid > TULIP_DE425 ?
         TULIP_MEDIA_BNC : TULIP_MEDIA_UNKNOWN; break;
     case 3: media = TULIP_MEDIA_10BASET; break;
     case 4: media = TULIP_MEDIA_10BASET_FD; break;
     case 5: media = TULIP_MEDIA_100BASETX; break;
     case 6: media = TULIP_MEDIA_100BASETX_FD; break;
     }
 #endif
 
 #if defined(__NetBSD__)
     bcopy(self->dv_xname, sc->tulip_if.if_xname, IFNAMSIZ);
     sc->tulip_if.if_softc = sc;
     sc->tulip_pc = pa->pa_pc;
 #else
     sc->tulip_unit = unit;
     sc->tulip_name = "de";
 #endif
     sc->tulip_revinfo = revinfo;
 #if defined(__FreeBSD__)
 #if BSD >= 199506
     sc->tulip_if.if_softc = sc;
 #endif
 #if defined(TULIP_IOMAPPED)
     retval = pci_map_port(config_id, PCI_CBIO, &csr_base);
 #else
     retval = pci_map_mem(config_id, PCI_CBMA, (vm_offset_t *) &csr_base, &pa_csrs);
 #endif
     if (!retval) {
 	free((caddr_t) sc->tulip_rxdescs, M_DEVBUF);
 	free((caddr_t) sc->tulip_txdescs, M_DEVBUF);
 	free((caddr_t) sc, M_DEVBUF);
 	return;
     }
     tulips[unit] = sc;
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
     sc->tulip_pf = printf;
 #if defined(TULIP_IOMAPPED)
     csr_base = ia->ia_iobase;
 #else
     csr_base = (vm_offset_t) mapphys((vm_offset_t) ia->ia_maddr, ia->ia_msize);
 #endif
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
     csr_base = 0;
     {
 	bus_space_tag_t iot, memt;
 	bus_space_handle_t ioh, memh;
 	int ioh_valid, memh_valid;
 
 	ioh_valid = (pci_mapreg_map(pa, PCI_CBIO, PCI_MAPREG_TYPE_IO, 0,
 				    &iot, &ioh, NULL, NULL) == 0);
 	memh_valid = (pci_mapreg_map(pa, PCI_CBMA,
 				     PCI_MAPREG_TYPE_MEM |
 				     PCI_MAPREG_MEM_TYPE_32BIT,
 				     0, &memt, &memh, NULL, NULL) == 0);
 	if (memh_valid) {
 	    sc->tulip_bustag = memt;
 	    sc->tulip_bushandle = memh;
 	} else if (ioh_valid) {
 	    sc->tulip_bustag = iot;
 	    sc->tulip_bushandle = ioh;
 	} else {
 	    printf(": unable to map device registers\n");
 	    return;
 	}
     }
 #endif /* __NetBSD__ */
 
     tulip_initcsrs(sc, csr_base + csroffset, csrsize);
     tulip_initring(sc, &sc->tulip_rxinfo, sc->tulip_rxdescs, TULIP_RXDESCS);
     tulip_initring(sc, &sc->tulip_txinfo, sc->tulip_txdescs, TULIP_TXDESCS);
 
     /*
      * Make sure there won't be any interrupts or such...
      */
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(100);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     if ((retval = tulip_read_macaddr(sc)) < 0) {
 #if defined(__FreeBSD__)
 	printf(TULIP_PRINTF_FMT, TULIP_PRINTF_ARGS);
 #endif
 	printf(": can't read ENET ROM (why=%d) (", retval);
 	for (idx = 0; idx < 32; idx++)
 	    printf("%02x", sc->tulip_rombuf[idx]);
 	printf("\n");
 	printf(TULIP_PRINTF_FMT ": %s%s pass %d.%d\n",
 	       TULIP_PRINTF_ARGS,
 	       sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid],
 	       (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F);
 	printf(TULIP_PRINTF_FMT ": address unknown\n", TULIP_PRINTF_ARGS);
     } else {
 	tulip_spl_t s;
 	tulip_intrfunc_t (*intr_rtn)(void *) = tulip_intr_normal;
 
 	if (sc->tulip_features & TULIP_HAVE_SHAREDINTR)
 	    intr_rtn = tulip_intr_shared;
 
 #if defined(__NetBSD__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    pci_intr_handle_t intrhandle;
 	    const char *intrstr;
 
 	    if (pci_intr_map(pa->pa_pc, pa->pa_intrtag, pa->pa_intrpin,
 			     pa->pa_intrline, &intrhandle)) {
 		printf(": couldn't map interrupt\n");
 		return;
 	    }
 	    intrstr = pci_intr_string(pa->pa_pc, intrhandle);
 	    sc->tulip_ih = pci_intr_establish(pa->pa_pc, intrhandle, IPL_NET,
 					      intr_rtn, sc);
 	    if (sc->tulip_ih == NULL)
 		printf(": couldn't establish interrupt");
 	    if (intrstr != NULL)
 		printf(" at %s", intrstr);
 	    printf("\n");
 	    if (sc->tulip_ih == NULL)
 		return;
 	}
 	sc->tulip_ats = shutdownhook_establish(tulip_shutdown, sc);
 	if (sc->tulip_ats == NULL)
 	    printf("\n%s: warning: couldn't establish shutdown hook\n",
 		   sc->tulip_xname);
 #endif
 #if defined(__FreeBSD__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    if (!pci_map_int (config_id, intr_rtn, (void*) sc, &net_imask)) {
 		printf(TULIP_PRINTF_FMT ": couldn't map interrupt\n",
 		       TULIP_PRINTF_ARGS);
 		return;
 	    }
 	}
 #if !defined(TULIP_DEVCONF)
 	at_shutdown(tulip_shutdown, sc, SHUTDOWN_POST_SYNC);
 #endif
 #endif
 #if defined(__bsdi__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    isa_establish(&sc->tulip_id, &sc->tulip_dev);
 
 	    sc->tulip_ih.ih_fun = intr_rtn;
 	    sc->tulip_ih.ih_arg = (void *) sc;
 	    intr_establish(ia->ia_irq, &sc->tulip_ih, DV_NET);
 	}
 
 	sc->tulip_ats.func = tulip_shutdown;
 	sc->tulip_ats.arg = (void *) sc;
 	atshutdown(&sc->tulip_ats, ATSH_ADD);
 #endif
 #if defined(TULIP_USE_SOFTINTR)
 	if (sc->tulip_unit > tulip_softintr_max_unit)
 	    tulip_softintr_max_unit = sc->tulip_unit;
 #endif
 
 	s = TULIP_RAISESPL();
 	tulip_reset(sc);
 	tulip_attach(sc);
 #if defined(__alpha__) && defined(__NetBSD__)
 	if (media != TULIP_MEDIA_UNKNOWN)
 	    tulip_linkup(sc, media);
 #endif
 	TULIP_RESTORESPL(s);
     }
 }
Index: head/sys/fs/cd9660/cd9660_vfsops.c
===================================================================
--- head/sys/fs/cd9660/cd9660_vfsops.c	(revision 34265)
+++ head/sys/fs/cd9660/cd9660_vfsops.c	(revision 34266)
@@ -1,891 +1,892 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
- * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $
+ * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <miscfs/specfs/specdev.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/cdio.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/iso_rrip.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/cd9660_mount.h>
 
 MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
 MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
 
 static int cd9660_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int cd9660_start __P((struct mount *, int, struct proc *));
 static int cd9660_unmount __P((struct mount *, int, struct proc *));
 static int cd9660_root __P((struct mount *, struct vnode **));
 static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, 
 	    struct proc *));
 static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int cd9660_sync __P((struct mount *, int, struct ucred *, 
 	    struct proc *));
 static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
 static int cd9660_vrele __P((struct mount *, struct vnode *));
 static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int cd9660_vptofh __P((struct vnode *, struct fid *));
 
 static struct vfsops cd9660_vfsops = {
 	cd9660_mount,
 	cd9660_start,
 	cd9660_unmount,
 	cd9660_root,
 	cd9660_quotactl,
 	cd9660_statfs,
 	cd9660_sync,
 	cd9660_vget,
 	cd9660_vrele,
 	cd9660_fhtovp,
 	cd9660_vptofh,
 	cd9660_init
 };
 VFS_SET(cd9660_vfsops, cd9660, MOUNT_CD9660, VFCF_READONLY);
 
 
 /*
  * Called by vfs_mountroot when iso is going to be mounted as root.
  */
 
 static int iso_get_ssector __P((dev_t dev, struct proc *p));
 static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
 			    struct proc *p, struct iso_args *argp));
 static int iso_mountroot __P((struct mount *mp, struct proc *p));
 
 /*
  * Try to find the start of the last data track on this CD-ROM.  This
  * is used to mount the last session of a multi-session CD.  Bail out
  * and return 0 if we fail, this is always a safe bet.
  */
 static int
 iso_get_ssector(dev, p)
 	dev_t dev;
 	struct proc *p;
 {
 	struct ioc_toc_header h;
 	struct ioc_read_toc_single_entry t;
 	int i;
 	struct bdevsw *bd;
 	d_ioctl_t *ioctlp;
 
 	bd = bdevsw[major(dev)];
 	ioctlp = bd->d_ioctl;
 	if (ioctlp == NULL)
 		return 0;
 
 	if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) == -1)
 		return 0;
 
 	for (i = h.ending_track; i >= 0; i--) {
 		t.address_format = CD_LBA_FORMAT;
 		t.track = i;
 		if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) == -1)
 			return 0;
 		if ((t.entry.control & 4) != 0)
 			/* found a data track */
 			break;
 	}
 
 	if (i < 0)
 		return 0;
 
 	return ntohl(t.entry.addr.lba);
 }
 
 static int
 iso_mountroot(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 	struct iso_args args;
 	int error;
 
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("iso_mountroot: can't find rootvp");
 		return (error);
 	}
 	args.flags = ISOFSMNT_ROOT;
 	args.ssector = iso_get_ssector(rootdev, p);
 	if (bootverbose)
 		printf("iso_mountroot(): using session at block %d\n",
 		       args.ssector);
 	if (error = iso_mountfs(rootvp, mp, p, &args))
 		return (error);
 
 	(void)cd9660_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 cd9660_mount(mp, path, data, ndp, p)
 	register struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct iso_args args;
 	u_int size;
 	int error;
 	struct iso_mnt *imp = 0;
 
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		return (iso_mountroot(mp, p));
 	}
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
 		return (error);
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EROFS);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		imp = VFSTOISOFS(mp);
 		if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags &
 		    D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (args.fspec == 0)
 			return (vfs_export(mp, &imp->im_export, &args.export));
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)))
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return ENOTBLK;
 	}
 	if (major(devvp->v_rdev) >= nblkdev) {
 		vrele(devvp);
 		return ENXIO;
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		error = iso_mountfs(devvp, mp, p, &args);
 	} else {
 		if (devvp != imp->im_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return error;
 	}
 	imp = VFSTOISOFS(mp);
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
 	return 0;
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 iso_mountfs(devvp, mp, p, argp)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct iso_args *argp;
 {
 	register struct iso_mnt *isomp = (struct iso_mnt *)0;
 	struct buf *bp = NULL;
 	dev_t dev = devvp->v_rdev;
 	int error = EINVAL;
 	int needclose = 0;
 	int high_sierra = 0;
 	int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	int iso_bsize;
 	int iso_blknum;
 	struct iso_volume_descriptor *vdp = 0;
 	struct iso_primary_descriptor *pri;
 	struct iso_sierra_primary_descriptor *pri_sierra;
 	struct iso_directory_record *rootp;
 	int logical_block_size;
 
 	if (!ronly)
 		return EROFS;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)))
 		return error;
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return EBUSY;
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
 		return (error);
 
 	if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)))
 		return error;
 	needclose = 1;
 
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
 	 * whichever is greater.  For now, we'll just use a constant.
 	 */
 	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
 
 	for (iso_blknum = 16 + argp->ssector;
 	     iso_blknum < 100 + argp->ssector;
 	     iso_blknum++) {
 		if (error = bread(devvp, iso_blknum * btodb(iso_bsize),
 				  iso_bsize, NOCRED, &bp))
 			goto out;
 		
 		vdp = (struct iso_volume_descriptor *)bp->b_data;
 		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
 			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
 				  sizeof vdp->id) != 0) {
 				error = EINVAL;
 				goto out;
 			} else
 				high_sierra = 1;
 		}
 
 		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) {
 			error = EINVAL;
 			goto out;
 		}
 
 		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY)
 			break;
 		brelse(bp);
 	}
 
 	if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) {
 		error = EINVAL;
 		goto out;
 	}
 
 	pri = (struct iso_primary_descriptor *)vdp;
 	pri_sierra = (struct iso_sierra_primary_descriptor *)vdp;
 
 	logical_block_size =
 		isonum_723 (high_sierra?
 			    pri_sierra->logical_block_size:
 			    pri->logical_block_size);
 
 	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
 	    || (logical_block_size & (logical_block_size - 1)) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	rootp = (struct iso_directory_record *)
 		(high_sierra?
 		 pri_sierra->root_directory_record:
 		 pri->root_directory_record);
 
 	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
 	bzero((caddr_t)isomp, sizeof *isomp);
 	isomp->logical_block_size = logical_block_size;
 	isomp->volume_space_size =
 		isonum_733 (high_sierra?
 			    pri_sierra->volume_space_size:
 			    pri->volume_space_size);
 	/*
 	 * Since an ISO9660 multi-session CD can also access previous
 	 * sessions, we have to include them into the space consider-
 	 * ations.  This doesn't yield a very accurate number since
 	 * parts of the old sessions might be inaccessible now, but we
 	 * can't do much better.  This is also important for the NFS
 	 * filehandle validation.
 	 */
 	isomp->volume_space_size += argp->ssector;
 	bcopy (rootp, isomp->root, sizeof isomp->root);
 	isomp->root_extent = isonum_733 (rootp->extent);
 	isomp->root_size = isonum_733 (rootp->size);
 
 	isomp->im_bmask = logical_block_size - 1;
 	isomp->im_bshift = 0;
 	while ((1 << isomp->im_bshift) < isomp->logical_block_size)
 		isomp->im_bshift++;
 
 	bp->b_flags |= B_AGE;
 	brelse(bp);
 	bp = NULL;
 
 	mp->mnt_data = (qaddr_t)isomp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
 
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 
 	/* Check the Rock Ridge Extention support */
 	if (!(argp->flags & ISOFSMNT_NORRIP)) {
 		if (error = bread(isomp->im_devvp,
 				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
 				  (isomp->im_bshift - DEV_BSHIFT),
 				  isomp->logical_block_size, NOCRED, &bp))
 		    goto out;
 		
 		rootp = (struct iso_directory_record *)bp->b_data;
 		
 		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
 		    argp->flags	 |= ISOFSMNT_NORRIP;
 		} else {
 		    argp->flags	 &= ~ISOFSMNT_GENS;
 		}
 
 		/*
 		 * The contents are valid,
 		 * but they will get reread as part of another vnode, so...
 		 */
 		bp->b_flags |= B_AGE;
 		brelse(bp);
 		bp = NULL;
 	}
 	isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
 
 	if(high_sierra)
 		/* this effectively ignores all the mount flags */
 		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
 	else
 		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
 		  default:
 			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
 			  break;
 		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
 			  isomp->iso_ftype = ISO_FTYPE_9660;
 			  break;
 		  case 0:
 			  isomp->iso_ftype = ISO_FTYPE_RRIP;
 			  break;
 		}
 
 	return 0;
 out:
-	devvp->v_specflags &= ~SI_MOUNTEDON;
+	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	if (needclose)
 		(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	if (isomp) {
 		free((caddr_t)isomp, M_ISOFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return error;
 }
 
 /*
  * Make a filesystem operational.
  * Nothing to do at the moment.
  */
 /* ARGSUSED */
 static int
 cd9660_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	return 0;
 }
 
 /*
  * unmount system call
  */
 static int
 cd9660_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 	int error, flags = 0;
 	
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 #if 0
 	mntflushbuf(mp, 0);
 	if (mntinvalbuf(mp))
 		return EBUSY;
 #endif
 	if ((error = vflush(mp, NULLVP, flags)))
 		return (error);
 
 	isomp = VFSTOISOFS(mp);
 
 
-	isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+	isomp->im_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
 	vrele(isomp->im_devvp);
 	free((caddr_t)isomp, M_ISOFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 cd9660_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
 	ino_t ino = isodirino(dp, imp);
 	
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
 /*
  * Do operations associated with quotas, not supported
  */
 /* ARGSUSED */
 static int
 cd9660_quotactl(mp, cmd, uid, arg, p)
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * Get file system statistics.
  */
 int
 cd9660_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 
 	isomp = VFSTOISOFS(mp);
 
 	sbp->f_type = MOUNT_CD9660;
 	sbp->f_bsize = isomp->logical_block_size;
 	sbp->f_iosize = sbp->f_bsize;	/* XXX */
 	sbp->f_blocks = isomp->volume_space_size;
 	sbp->f_bfree = 0; /* total free blocks */
 	sbp->f_bavail = 0; /* blocks free for non superuser */
 	sbp->f_files =	0; /* total files */
 	sbp->f_ffree = 0; /* free file nodes */
 	if (sbp != &mp->mnt_stat) {
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	/* Use the first spare for flags: */
-	sbp->f_spare[0] = isomp->im_flags;
+	/* Don't do this!!! XXX */
+	/* sbp->f_spare[0] = isomp->im_flags; */
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 cd9660_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is in range
  * - call iget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the generation number matches
  */
 
 struct ifid {
 	ushort	ifid_len;
 	ushort	ifid_pad;
 	int	ifid_ino;
 	long	ifid_start;
 };
 
 /* ARGSUSED */
 int
 cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct ifid *ifhp = (struct ifid *)fhp;
 	register struct iso_node *ip;
 	register struct netcred *np;
 	register struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct vnode *nvp;
 	int error;
 	
 #ifdef	ISOFS_DBG
 	printf("fhtovp: ino %d, start %ld\n",
 	       ifhp->ifid_ino, ifhp->ifid_start);
 #endif
 	
 	/*
 	 * Get the export permission structure for this <mp, client> tuple.
 	 */
 	np = vfs_export_lookup(mp, &imp->im_export, nam);
 	if (np == NULL)
 		return (EACCES);
 
 	if (error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->inode.iso_mode == 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 int
 cd9660_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 
 	/*
 	 * XXXX
 	 * It would be nice if we didn't always set the `relocated' flag
 	 * and force the extra read, but I don't want to think about fixing
 	 * that right now.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 #if 0
 	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
 #else
 	    0,
 #endif
 	    (struct iso_directory_record *)0));
 }
 
 /*
  * Complement to all vpp returning ops.
  * XXX - initially only to get rid of WILLRELE.
  */
 /* ARGSUSED */
 static int
 cd9660_vrele(mp, vp)
 	struct mount *mp;
 	struct vnode *vp;
 {
 	return (EOPNOTSUPP);
 }
 
 int
 cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 	int relocated;
 	struct iso_directory_record *isodir;
 {
 	struct iso_mnt *imp;
 	struct iso_node *ip;
 	struct buf *bp;
 	struct vnode *vp, *nvp;
 	dev_t dev;
 	int error;
 
 	imp = VFSTOISOFS(mp);
 	dev = imp->im_dev;
 	if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
 		return (0);
 
 	/* Allocate a new vnode/iso_node. */
 	if (error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
 	    M_WAITOK);
 	bzero((caddr_t)ip, sizeof(struct iso_node));
 	lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	cd9660_ihashins(ip);
 
 	if (isodir == 0) {
 		int lbn, off;
 
 		lbn = lblkno(imp, ino);
 		if (lbn >= imp->volume_space_size) {
 			vput(vp);
 			printf("fhtovp: lbn exceed volume space %d\n", lbn);
 			return (ESTALE);
 		}
 	
 		off = blkoff(imp, ino);
 		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
 			vput(vp);
 			printf("fhtovp: crosses block boundary %d\n",
 			       off + ISO_DIRECTORY_RECORD_SIZE);
 			return (ESTALE);
 		}
 	
 		error = bread(imp->im_devvp,
 			      lbn << (imp->im_bshift - DEV_BSHIFT),
 			      imp->logical_block_size, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			brelse(bp);
 			printf("fhtovp: bread error %d\n",error);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)(bp->b_data + off);
 
 		if (off + isonum_711(isodir->length) >
 		    imp->logical_block_size) {
 			vput(vp);
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
 			       off +isonum_711(isodir->length), off,
 			       isonum_711(isodir->length));
 			return (ESTALE);
 		}
 	
 #if 0
 		if (isonum_733(isodir->extent) +
 		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: file start miss %d vs %d\n",
 			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
 			       ifhp->ifid_start);
 			return (ESTALE);
 		}
 #endif
 	} else
 		bp = 0;
 
 	ip->i_mnt = imp;
 	ip->i_devvp = imp->im_devvp;
 	VREF(ip->i_devvp);
 
 	if (relocated) {
 		/*
 		 * On relocated directories we must
 		 * read the `.' entry out of a dir.
 		 */
 		ip->iso_start = ino >> imp->im_bshift;
 		if (bp != 0)
 			brelse(bp);
 		if (error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) {
 			vput(vp);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)bp->b_data;
 	}
 
 	ip->iso_extent = isonum_733(isodir->extent);
 	ip->i_size = isonum_733(isodir->size);
 	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
 	
 	/*
 	 * Setup time stamp, attribute
 	 */
 	vp->v_type = VNON;
 	switch (imp->iso_ftype) {
 	default:	/* ISO_FTYPE_9660 */
 	    {
 		struct buf *bp2;
 		int off;
 		if ((imp->im_flags & ISOFSMNT_EXTATT)
 		    && (off = isonum_711(isodir->ext_attr_length)))
 			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
 				     &bp2);
 		else
 			bp2 = NULL;
 		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
 		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
 		if (bp2)
 			brelse(bp2);
 		break;
 	    }
 	case ISO_FTYPE_RRIP:
 		cd9660_rrip_analyze(isodir, ip, imp);
 		break;
 	}
 
 	if (bp != 0)
 		brelse(bp);
 
 	/*
 	 * Initialize the associated vnode
 	 */
 	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
 	case VFIFO:
 		vp->v_op = cd9660_fifoop_p;
 		break;
 	case VCHR:
 	case VBLK:
 		/*
 		 * if device, look at device number table for translation
 		 */
 		vp->v_op = cd9660_specop_p;
 		if (nvp = checkalias(vp, ip->inode.iso_rdev, mp)) {
 			/*
 			 * Discard unneeded vnode, but save its iso_node.
 			 * Note that the lock is carried over in the iso_node
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	}
 	
 	if (ip->iso_extent == imp->root_extent)
 		vp->v_flag |= VROOT;
 
 	/*
 	 * XXX need generation number?
 	 */
 	
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 cd9660_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct iso_node *ip = VTOI(vp);
 	register struct ifid *ifhp;
 
 	ifhp = (struct ifid *)fhp;
 	ifhp->ifid_len = sizeof(struct ifid);
 
 	ifhp->ifid_ino = ip->i_number;
 	ifhp->ifid_start = ip->iso_start;
 
 #ifdef	ISOFS_DBG
 	printf("vptofh: ino %d, start %ld\n",
 	       ifhp->ifid_ino,ifhp->ifid_start);
 #endif
 	return 0;
 }
Index: head/sys/fs/msdosfs/msdosfs_vfsops.c
===================================================================
--- head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 34265)
+++ head/sys/fs/msdosfs/msdosfs_vfsops.c	(revision 34266)
@@ -1,1051 +1,1054 @@
-/*	$Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */
+/*	$Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h> 				/* defines ALLPERMS */
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/bootsect.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
 static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
 
 static int	update_mp __P((struct mount *mp, struct msdosfs_args *argp));
 static int	mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
 				  struct proc *p, struct msdosfs_args *argp));
 static int	msdosfs_fhtovp __P((struct mount *, struct fid *,
 				    struct sockaddr *, struct vnode **, int *,
 				    struct ucred **));
 static int	msdosfs_mount __P((struct mount *, char *, caddr_t,
 				   struct nameidata *, struct proc *));
 static int	msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				      struct proc *));
 static int	msdosfs_root __P((struct mount *, struct vnode **));
 static int	msdosfs_start __P((struct mount *, int, struct proc *));
 static int	msdosfs_statfs __P((struct mount *, struct statfs *,
 				    struct proc *));
 static int	msdosfs_sync __P((struct mount *, int, struct ucred *,
 				  struct proc *));
 static int	msdosfs_unmount __P((struct mount *, int, struct proc *));
 static int	msdosfs_vget __P((struct mount *mp, ino_t ino,
 				  struct vnode **vpp));
 static int	msdosfs_vptofh __P((struct vnode *, struct fid *));
 
 static int
 update_mp(mp, argp)
 	struct mount *mp;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error;
 
 	pmp->pm_gid = argp->gid;
 	pmp->pm_uid = argp->uid;
 	pmp->pm_mask = argp->mask & ALLPERMS;
 	pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
 	if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
 		bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
 		bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
 		bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
 		bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
 		bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
 	}
 
 #ifndef __FreeBSD__
 	/*
 	 * GEMDOS knows nothing (yet) about win95
 	 */
 	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
 #endif
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 	else if (!(pmp->pm_flags &
 	    (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
 		struct vnode *rootvp;
 
 		/*
 		 * Try to divine whether to support Win'95 long filenames
 		 */
 		if (FAT32(pmp))
 			pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
 		else {
 			if ((error = msdosfs_root(mp, &rootvp)) != 0)
 				return error;
 			pmp->pm_flags |= findwin95(VTODE(rootvp))
 				? MSDOSFSMNT_LONGNAME
 					: MSDOSFSMNT_SHORTNAME;
 			vput(rootvp);
 		}
 	}
 	return 0;
 }
 
 #ifndef __FreeBSD__
 int
 msdosfs_mountroot()
 {
 	register struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	size_t size;
 	int error;
 	struct msdosfs_args args;
 
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
 	/*
 	 * Get vnodes for swapdev and rootdev.
 	 */
 	if (bdevvp(rootdev, &rootvp))
 		panic("msdosfs_mountroot: can't setup rootvp");
 
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &msdosfs_vfsops;
 	mp->mnt_flag = 0;
 	LIST_INIT(&mp->mnt_vnodelist);
 
 	args.flags = 0;
 	args.uid = 0;
 	args.gid = 0;
 	args.mask = 0777;
 
 	if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = update_mp(mp, &args)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = vfs_lock(mp)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mp->mnt_vnodecovered = NULLVP;
 	(void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)msdosfs_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	return (0);
 }
 #endif
 
 /*
  * mp - path - addr in user space of mount point (ie /usr or whatever)
  * data - addr in user space of mount params including the name of the block
  * special file to treat as a filesystem.
  */
 static int
 msdosfs_mount(mp, path, data, ndp, p)
 	struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
 	struct msdosfs_args args; /* will hold data from mount request */
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	size_t size;
 	int error, flags;
 	mode_t accessmode;
 
 	error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
 	if (error)
 		return (error);
 	if (args.magic != MSDOSFS_ARGSMAGIC) {
 		printf("Old mount_msdosfs, flags=%d\n", args.flags);
 		args.flags = 0;
 	}
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		pmp = VFSTOMSDOSFS(mp);
 		error = 0;
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = vflush(mp, NULLVP, flags);
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			/* not yet implemented */
 			error = EOPNOTSUPP;
 		if (error)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = pmp->pm_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE, p);
 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
 						   p->p_ucred, p);
 				if (error) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 		}
 		if (args.fspec == 0) {
 #ifdef	__notyet__		/* doesn't work correctly with current mountd	XXX */
 			if (args.flags & MSDOSFSMNT_MNTOPT) {
 				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
 				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
 				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 			}
 #endif
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &pmp->pm_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	error = namei(ndp);
 	if (error)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (major(devvp->v_rdev) >= nblkdev) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 		if (error) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp, p, &args);
 #ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
 		pmp = VFSTOMSDOSFS(mp);
 #endif
 	} else {
 		if (devvp != pmp->pm_devvp)
 			error = EINVAL;	/* XXX needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 
 	error = update_mp(mp, &args);
 	if (error) {
 		msdosfs_unmount(mp, MNT_FORCE, p);
 		return error;
 	}
 
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) msdosfs_statfs(mp, &mp->mnt_stat, p);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
 #endif
 	return (0);
 }
 
 static int
 mountmsdosfs(devvp, mp, p, argp)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
 	dev_t dev = devvp->v_rdev;
 #ifndef __FreeBSD__
 	struct partinfo dpart;
 #endif
 	union bootsector *bsp;
 	struct byte_bpb33 *b33;
 	struct byte_bpb50 *b50;
 #ifdef	PC98
 	u_int	pc98_wrk;
 	u_int	Phy_Sector_Size;
 #endif
 	struct byte_bpb710 *b710;
 	u_int8_t SecPerClust;
 	int	ronly, error;
 	int	bsize = 0, dtype = 0, tmp;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp  = NULL; /* both used in error_exit */
 	pmp = NULL;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		/*
 	 	 * We need the disklabel to calculate the size of a FAT entry
 		 * later on. Also make sure the partition contains a filesystem
 		 * of type FS_MSDOS. This doesn't work for floppies, so we have
 		 * to check for them too.
 	 	 *
 	 	 * At least some parts of the msdos fs driver seem to assume
 		 * that the size of a disk block will always be 512 bytes.
 		 * Let's check it...
 		 */
 		error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
 				  FREAD, NOCRED, p);
 		if (error)
 			goto error_exit;
 		tmp   = dpart.part->p_fstype;
 		dtype = dpart.disklab->d_type;
 		bsize = dpart.disklab->d_secsize;
 		if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 	}
 #endif
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 */
 #ifdef	PC98
 	devvp->v_flag &= 0xffff; 
 	error = bread(devvp, 0, 1024, NOCRED, &bp);
 #else
 	error = bread(devvp, 0, 512, NOCRED, &bp);
 #endif
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
 	bsp = (union bootsector *)bp->b_data;
 	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
 	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
 	b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 #ifdef PC98
 		if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
 		    && (bsp->bs50.bsBootSectSig0 != 0       /* PC98 DOS 3.3x */
 		    || bsp->bs50.bsBootSectSig1 != 0)
 		    && (bsp->bs50.bsBootSectSig0 != 0x90    /* PC98 DOS 5.0  */
 		    || bsp->bs50.bsBootSectSig1 != 0x3d)
 		    && (bsp->bs50.bsBootSectSig0 != 0x46    /* PC98 DOS 3.3B */
 		    || bsp->bs50.bsBootSectSig1 != 0xfa)) {
 #else
 		if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
 	bzero((caddr_t)pmp, sizeof *pmp);
 	pmp->pm_mountp = mp;
 
 	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
 	 */
 	SecPerClust = b50->bpbSecPerClust;
 	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
 	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
 	pmp->pm_FATs = b50->bpbFATs;
 	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
 	pmp->pm_Sectors = getushort(b50->bpbSectors);
 	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
 	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
 	pmp->pm_Heads = getushort(b50->bpbHeads);
 	pmp->pm_Media = b50->bpbMedia;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 		/* XXX - We should probably check more values here */
 		if (!pmp->pm_BytesPerSec || !SecPerClust
 			|| !pmp->pm_Heads || pmp->pm_Heads > 255
 #ifdef PC98
 	    		|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
 #else
 			|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
 	} else {
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
 #ifdef	PC98	/* for PC98		added Satoshi Yasuda	*/
 	Phy_Sector_Size = 512;
 	if ((devvp->v_rdev>>8) == 2) {	/* floppy check */
 		if (((devvp->v_rdev&077) == 2) && (pmp->pm_HugeSectors == 1232)) {
 				Phy_Sector_Size = 1024;	/* 2HD */
 				/*
 				 * 1024byte/sector support
 				 */
 				devvp->v_flag |= 0x10000;
 		} else {
 			if ((((devvp->v_rdev&077) == 3)	/* 2DD 8 or 9 sector */
 				&& (pmp->pm_HugeSectors == 1440)) /* 9 sector */
 				|| (((devvp->v_rdev&077) == 4)
 				&& (pmp->pm_HugeSectors == 1280)) /* 8 sector */
 				|| (((devvp->v_rdev&077) == 5)
 				&& (pmp->pm_HugeSectors == 2880))) { /* 1.44M */
 					Phy_Sector_Size = 512;
 			} else {
 				if (((devvp->v_rdev&077) != 1)
 				    && ((devvp->v_rdev&077) != 0)) { /* 2HC */
 					error = EINVAL;
 					goto error_exit;
 				}
 			}
 		}
 	}			
 	pc98_wrk = pmp->pm_BytesPerSec / Phy_Sector_Size;
 	pmp->pm_BytesPerSec = Phy_Sector_Size;
 	SecPerClust = SecPerClust * pc98_wrk;
 	pmp->pm_HugeSectors = pmp->pm_HugeSectors * pc98_wrk;
 	pmp->pm_ResSectors = pmp->pm_ResSectors * pc98_wrk;
 	pmp->pm_FATsecs = pmp->pm_FATsecs * pc98_wrk;
 	pmp->pm_SecPerTrack = pmp->pm_SecPerTrack * pc98_wrk;
 	pmp->pm_HiddenSects = pmp->pm_HiddenSects * pc98_wrk;
 #endif			/*						*/ 
 	if (pmp->pm_HugeSectors > 0xffffffff / pmp->pm_BytesPerSec + 1) {
 		/*
 		 * We cannot deal currently with this size of disk
 		 * due to fileid limitations (see msdosfs_getattr and
 		 * msdosfs_readdir)
 		 */
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
 		    || bsp->bs710.bsBootSectSig3 != BOOTSIG3
 		    || pmp->pm_Sectors
 		    || pmp->pm_FATsecs
 		    || getushort(b710->bpbFSVers)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		pmp->pm_fatmask = FAT32_MASK;
 		pmp->pm_fatmult = 4;
 		pmp->pm_fatdiv = 1;
 		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
 		if (getushort(b710->bpbExtFlags) & FATMIRROR)
 			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
 		else
 			pmp->pm_flags |= MSDOSFS_FATMIRROR;
 	} else
 		pmp->pm_flags |= MSDOSFS_FATMIRROR;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if (FAT32(pmp)) {
 			/*
 			 * GEMDOS doesn't know fat32.
 			 */
 			error = EINVAL;
 			goto error_exit;
 		}
 
 		/*
 		 * Check a few values (could do some more):
 		 * - logical sector size: power of 2, >= block size
 		 * - sectors per cluster: power of 2, >= 1
 		 * - number of sectors:   >= 1, <= size of partition
 		 */
 		if ( (SecPerClust == 0)
 		  || (SecPerClust & (SecPerClust - 1))
 		  || (pmp->pm_BytesPerSec < bsize)
 		  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
 		  || (pmp->pm_HugeSectors == 0)
 		  || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
 							> dpart.part->p_size)
 		   ) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		/*
 		 * XXX - Many parts of the msdos fs driver seem to assume that
 		 * the number of bytes per logical sector (BytesPerSec) will
 		 * always be the same as the number of bytes per disk block
 		 * Let's pretend it is.
 		 */
 		tmp = pmp->pm_BytesPerSec / bsize;
 		pmp->pm_BytesPerSec  = bsize;
 		pmp->pm_HugeSectors *= tmp;
 		pmp->pm_HiddenSects *= tmp;
 		pmp->pm_ResSectors  *= tmp;
 		pmp->pm_Sectors     *= tmp;
 		pmp->pm_FATsecs     *= tmp;
 		SecPerClust         *= tmp;
 	}
 #endif
 	pmp->pm_fatblk = pmp->pm_ResSectors;
 	if (FAT32(pmp)) {
 		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
 		pmp->pm_firstcluster = pmp->pm_fatblk
 			+ (pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
 	} else {
 		pmp->pm_rootdirblk = pmp->pm_fatblk +
 			(pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
 				       + pmp->pm_BytesPerSec - 1)
 			/ pmp->pm_BytesPerSec;/* in sectors */
 		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
 	}
 
 	pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust;
 	pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
 	pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
 		      && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
 		      && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
 		    ) {
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	} else 
 #endif
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
 		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
 			/*
 			 * This will usually be a floppy disk. This size makes
 			 * sure that one fat entry will not be split across
 			 * multiple blocks.
 			 */
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	}
 	if (FAT12(pmp))
 		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
 	else
 		pmp->pm_fatblocksize = MAXBSIZE;
 
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
 	pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
 
 	/*
 	 * Compute mask and shift value for isolating cluster relative byte
 	 * offsets and cluster numbers from a file offset.
 	 */
 	pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
 	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
 	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
 
 	/*
 	 * Check for valid cluster size
 	 * must be a power of 2
 	 */
 	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	/*
 	 * Release the bootsector buffer.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 	/*
 	 * Check FSInfo.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
 		if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
 		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
 		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
 		else
 			pmp->pm_fsinfo = 0;
 		brelse(bp);
 		bp = NULL;
 	}
 
 	/*
 	 * Check and validate (or perhaps invalidate?) the fsinfo structure?		XXX
 	 */
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
 	 * fill it in.
 	 */
 	pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
 				   / N_INUSEBITS)
 				  * sizeof(*pmp->pm_inusemap),
 				  M_MSDOSFSFAT, M_WAITOK);
 
 	/*
 	 * fillinusemap() needs pm_devvp.
 	 */
 	pmp->pm_dev = dev;
 	pmp->pm_devvp = devvp;
 
 	/*
 	 * Have the inuse map filled in.
 	 */
 	if ((error = fillinusemap(pmp)) != 0)
 		goto error_exit;
 
 	/*
 	 * If they want fat updates to be synchronous then let them suffer
 	 * the performance degradation in exchange for the on disk copy of
 	 * the fat being correct just about all the time.  I suppose this
 	 * would be a good thing to turn on if the kernel is still flakey.
 	 */
 	if (mp->mnt_flag & MNT_SYNCHRONOUS)
 		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
 
 	/*
 	 * Finish up.
 	 */
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else
 		pmp->pm_fmod = 1;
 	mp->mnt_data = (qaddr_t) pmp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_flag |= MNT_LOCAL;
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 
 	return 0;
 
 error_exit:
 	if (bp)
 		brelse(bp);
 	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
 	if (pmp) {
 		if (pmp->pm_inusemap)
 			free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 
 	return (0);
 }
 
 /*
  * Unmount the filesystem described by mp.
  */
 static int
 msdosfs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	error = vflush(mp, NULLVP, flags);
 	if (error)
 		return error;
 	pmp = VFSTOMSDOSFS(mp);
-	pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON;
+	pmp->pm_devvp->v_specmountpoint = NULL;
 #ifdef MSDOSFS_DEBUG
 	{
 		struct vnode *vp = pmp->pm_devvp;
 
 		printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
 		    vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 		printf("lastr %d, id %lu, mount %p, op %p\n",
 		    vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
 		printf("freef %p, freeb %p, mount %p\n",
 		    vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    vp->v_cleanblkhd.lh_first,
 		    vp->v_dirtyblkhd.lh_first,
 		    vp->v_numoutput, vp->v_type);
 		printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
 		    vp->v_socket, vp->v_tag,
 		    ((u_int *)vp->v_data)[0],
 		    ((u_int *)vp->v_data)[1]);
 	}
 #endif
-	error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
-	    NOCRED, p);
+	error = VOP_CLOSE(pmp->pm_devvp,
+		    (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
+		    NOCRED, p);
 	vrele(pmp->pm_devvp);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 static int
 msdosfs_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
 #endif
 	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
 	if (error)
 		return (error);
 	*vpp = DETOV(ndep);
 	return (0);
 }
 
 static int
 msdosfs_quotactl(mp, cmds, uid, arg, p)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 msdosfs_statfs(mp, sbp, p)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 
 	pmp = VFSTOMSDOSFS(mp);
 	sbp->f_bsize = pmp->pm_bpcluster;
 	sbp->f_iosize = pmp->pm_bpcluster;
 	sbp->f_blocks = pmp->pm_nmbrofclusters;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
 	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (0);
 }
 
 static int
 msdosfs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *vp, *nvp;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
 
 	/*
 	 * If we ever switch to not updating all of the fats all the time,
 	 * this would be the place to update them from the first one.
 	 */
 	if (pmp->pm_fmod != 0)
 		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
 			panic("msdosfs_sync: rofs mod");
 		else {
 			/* update fats here */
 		}
 	/*
 	 * Write back each (modified) denode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first;
 	     vp != NULL;
 	     vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * assoicated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		dep = VTODE(vp);
-		if (vp->v_type == VNON || ((dep->de_flag &
-		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0)
-		    && vp->v_dirtyblkhd.lh_first == NULL) {
+		if (vp->v_type == VNON
+		|| (waitfor == MNT_LAZY) /* can this happen with msdosfs? */
+		|| (((dep->de_flag &
+		     (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0)
+		  && (vp->v_dirtyblkhd.lh_first == NULL))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		error = VOP_FSYNC(vp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);	/* done with this one	 */
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 
 	/*
 	 * Flush filesystem control info.
 	 */
 	error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
 	if (error)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
 	struct denode *dep;
 	struct netcred *np;
 	int error;
 
 	np = vfs_export_lookup(mp, &pmp->pm_export, nam);
 	if (np == NULL)
 		return (EACCES);
 	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	*vpp = DETOV(dep);
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 static int
 msdosfs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	struct denode *dep;
 	struct defid *defhp;
 
 	dep = VTODE(vp);
 	defhp = (struct defid *)fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 static int
 msdosfs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	return EOPNOTSUPP;
 }
 
 static struct vfsops msdosfs_vfsops = {
 	msdosfs_mount,
 	msdosfs_start,
 	msdosfs_unmount,
 	msdosfs_root,
 	msdosfs_quotactl,
 	msdosfs_statfs,
 	msdosfs_sync,
 	msdosfs_vget,
 	vfs_vrele,
 	msdosfs_fhtovp,
 	msdosfs_vptofh,
 	msdosfs_init
 };
 
 VFS_SET(msdosfs_vfsops, msdos, MOUNT_MSDOS, 0);
Index: head/sys/fs/specfs/spec_vnops.c
===================================================================
--- head/sys/fs/specfs/spec_vnops.c	(revision 34265)
+++ head/sys/fs/specfs/spec_vnops.c	(revision 34266)
@@ -1,911 +1,917 @@
 /*
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)spec_vnops.c	8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $
+ * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $
  */
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
 
 #include <miscfs/specfs/specdev.h>
 
 static int	spec_getattr __P((struct  vop_getattr_args *));
 static int	spec_badop __P((void));
 static int	spec_strategy __P((struct vop_strategy_args *));
 static int	spec_print __P((struct vop_print_args *));
 static int	spec_lookup __P((struct vop_lookup_args *));
 static int	spec_open __P((struct vop_open_args *));
 static int	spec_close __P((struct vop_close_args *));
 static int	spec_read __P((struct vop_read_args *));  
 static int	spec_write __P((struct vop_write_args *));
 static int	spec_ioctl __P((struct vop_ioctl_args *));
 static int	spec_poll __P((struct vop_poll_args *));
 static int	spec_inactive __P((struct  vop_inactive_args *));
 static int	spec_fsync __P((struct  vop_fsync_args *));
 static int	spec_bmap __P((struct vop_bmap_args *));
 static int	spec_advlock __P((struct vop_advlock_args *));  
 static int	spec_getpages __P((struct vop_getpages_args *));
 
 struct vnode *speclisth[SPECHSZ];
 vop_t **spec_vnodeop_p;
 static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) vop_ebadf },
 	{ &vop_advlock_desc,		(vop_t *) spec_advlock },
 	{ &vop_bmap_desc,		(vop_t *) spec_bmap },
 	{ &vop_close_desc,		(vop_t *) spec_close },
 	{ &vop_create_desc,		(vop_t *) spec_badop },
 	{ &vop_fsync_desc,		(vop_t *) spec_fsync },
 	{ &vop_getattr_desc,		(vop_t *) spec_getattr },
 	{ &vop_getpages_desc,		(vop_t *) spec_getpages },
 	{ &vop_inactive_desc,		(vop_t *) spec_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) spec_ioctl },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) spec_badop },
 	{ &vop_lookup_desc,		(vop_t *) spec_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) spec_badop },
 	{ &vop_mknod_desc,		(vop_t *) spec_badop },
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) spec_badop },
 	{ &vop_readlink_desc,		(vop_t *) spec_badop },
 	{ &vop_reallocblks_desc,	(vop_t *) spec_badop },
 	{ &vop_reclaim_desc,		(vop_t *) vop_null },
 	{ &vop_remove_desc,		(vop_t *) spec_badop },
 	{ &vop_rename_desc,		(vop_t *) spec_badop },
 	{ &vop_rmdir_desc,		(vop_t *) spec_badop },
 	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
 	{ &vop_strategy_desc,		(vop_t *) spec_strategy },
 	{ &vop_symlink_desc,		(vop_t *) spec_badop },
 	{ &vop_write_desc,		(vop_t *) spec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
 
 VNODEOP_SET(spec_vnodeop_opv_desc);
 
 
 int
 spec_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 static void spec_getpages_iodone __P((struct buf *bp));
 
 /*
  * Trivial lookup routine that always fails.
  */
 static int
 spec_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * Open a special file.
  */
 /* ARGSUSED */
 static int
 spec_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct proc *p = ap->a_p;
 	struct vnode *bvp, *vp = ap->a_vp;
 	dev_t bdev, dev = (dev_t)vp->v_rdev;
 	int maj = major(dev);
 	int error;
 
 	/*
 	 * Don't allow open if fs is mounted -nodev.
 	 */
 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
 		return (ENXIO);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		if ((u_int)maj >= nchrdev)
 			return (ENXIO);
 		if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL))
 			return ENXIO;
 		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
 			/*
 			 * When running in very secure mode, do not allow
 			 * opens for writing of any disk character devices.
 			 */
 			if (securelevel >= 2
 			    && cdevsw[maj]->d_bdev
 			    && (cdevsw[maj]->d_bdev->d_flags & D_TYPEMASK) == 
 			    D_DISK)
 				return (EPERM);
 			/*
 			 * When running in secure mode, do not allow opens
 			 * for writing of /dev/mem, /dev/kmem, or character
 			 * devices whose corresponding block devices are
 			 * currently mounted.
 			 */
 			if (securelevel >= 1) {
 				if ((bdev = chrtoblk(dev)) != NODEV &&
 				    vfinddev(bdev, VBLK, &bvp) &&
 				    bvp->v_usecount > 0 &&
 				    (error = vfs_mountedon(bvp)))
 					return (error);
 				if (iskmemdev(dev))
 					return (EPERM);
 			}
 		}
 #if 0
 		/*
 		 * Lite2 stuff.  We will almost certainly do this
 		 * differently with devfs.  The only use of this flag
 		 * is in dead_read to make ttys return EOF instead of
 		 * EIO when they are dead.  Pre-lite2 FreeBSD returns
 		 * EOF for all character devices.
 		 */
 		if (cdevsw[maj]->d_type == D_TTY)
 			vp->v_flag |= VISTTY;
 #endif
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if ((u_int)maj >= nblkdev)
 			return (ENXIO);
 		if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL))
 			return ENXIO;
 		/*
 		 * When running in very secure mode, do not allow
 		 * opens for writing of any disk block devices.
 		 */
 		if (securelevel >= 2 && ap->a_cred != FSCRED &&
 		    (ap->a_mode & FWRITE) &&
 		    (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
 			return (EPERM);
 
 		/*
 		 * Do not allow opens of block devices that are
 		 * currently mounted.
 		 */
 		error = vfs_mountedon(vp);
 		if (error)
 			return (error);
 		return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p));
 	}
 	return (0);
 }
 
 /*
  * Vnode op for read
  */
 /* ARGSUSED */
 static int
 spec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
  	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn, nextbn;
 	long bsize, bscale;
 	struct partinfo dpart;
 	int n, on, majordev;
 	d_ioctl_t *ioctl;
 	int error = 0;
 	dev_t dev;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("spec_read mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_read proc");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[major(vp->v_rdev)]->d_read)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		dev = vp->v_rdev;
 		if ((majordev = major(dev)) < nblkdev &&
 		    (ioctl = bdevsw[majordev]->d_ioctl) != NULL &&
 		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
 		    dpart.part->p_fstype == FS_BSDFFS &&
 		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		bscale = btodb(bsize);
 		do {
 			bn = btodb(uio->uio_offset) & ~(bscale - 1);
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (vp->v_lastr + bscale == bn) {
 				nextbn = bn + bscale;
 				error = breadn(vp, bn, (int)bsize, &nextbn,
 					(int *)&bsize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
 			vp->v_lastr = bn;
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			brelse(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_read type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Vnode op for write
  */
 /* ARGSUSED */
 static int
 spec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn;
 	int bsize, blkmask;
 	struct partinfo dpart;
 	register int n, on;
 	int error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("spec_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_write proc");
 #endif
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[major(vp->v_rdev)]->d_write)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_resid == 0)
 			return (0);
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
 		    (caddr_t)&dpart, FREAD, p) == 0) {
 			if (dpart.part->p_fstype == FS_BSDFFS &&
 			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 				bsize = dpart.part->p_frag *
 				    dpart.part->p_fsize;
 		}
 		blkmask = btodb(bsize) - 1;
 		do {
 			bn = btodb(uio->uio_offset) & ~blkmask;
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (n == bsize)
 				bp = getblk(vp, bn, bsize, 0, 0);
 			else
 				error = bread(vp, bn, bsize, NOCRED, &bp);
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			if (n + on == bsize)
 				bawrite(bp);
 			else
 				bdwrite(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_write type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Device ioctl operation.
  */
 /* ARGSUSED */
 static int
 spec_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		int  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	dev_t dev = ap->a_vp->v_rdev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
 		    ap->a_fflag, ap->a_p));
 
 	case VBLK:
 		if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
 			if ((bdevsw[major(dev)]->d_flags & D_TYPEMASK) ==
 			    D_TAPE)
 				return (0);
 			else
 				return (1);
 		return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
 		   ap->a_fflag, ap->a_p));
 
 	default:
 		panic("spec_ioctl");
 		/* NOTREACHED */
 	}
 }
 
 /* ARGSUSED */
 static int
 spec_poll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register dev_t dev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		dev = ap->a_vp->v_rdev;
 		return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p);
 	default:
 		return (vop_defaultop((struct vop_generic_args *)ap));
 
 	}
 }
 /*
  * Synch buffers associated with a block device
  */
 /* ARGSUSED */
 static int
 spec_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int  a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct buf *bp;
 	struct buf *nbp;
 	int s;
 
 	if (vp->v_type == VCHR)
 		return (0);
 	/*
 	 * Flush all dirty buffers associated with a block device.
 	 */
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
 		if ((bp->b_flags & B_BUSY))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("spec_fsync: not dirty");
 		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
 			vfs_bio_awrite(bp);
 			splx(s);
 		} else {
 			bremfree(bp);
 			bp->b_flags |= B_BUSY;
 			splx(s);
 			bawrite(bp);
 		}
 		goto loop;
 	}
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
 		}
 #ifdef DIAGNOSTIC
 		if (vp->v_dirtyblkhd.lh_first) {
 			vprint("spec_fsync: dirty", vp);
 			splx(s);
 			goto loop;
 		}
 #endif
 	}
 	splx(s);
 	return (0);
 }
 
 static int
 spec_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Just call the device strategy routine
  */
 static int
 spec_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
+	struct buf *bp;
 
-	(*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp);
+	bp = ap->a_bp;
+	if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
+		(*bioops.io_start)(bp);
+	(*bdevsw[major(bp->b_dev)]->d_strategy)(bp);
 	return (0);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 spec_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Device close routine
  */
 /* ARGSUSED */
 static int
 spec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct proc *p = ap->a_p;
 	dev_t dev = vp->v_rdev;
 	d_close_t *devclose;
 	int mode, error;
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		/*
 		 * Hack: a tty device that is a controlling terminal
 		 * has a reference from the session structure.
 		 * We cannot easily tell that a character device is
 		 * a controlling terminal, unless it is the closing
 		 * process' controlling terminal.  In that case,
 		 * if the reference count is 2 (this last descriptor
 		 * plus the session), release the reference from the session.
 		 */
 		if (vcount(vp) == 2 && ap->a_p &&
 		    (vp->v_flag & VXLOCK) == 0 &&
 		    vp == ap->a_p->p_session->s_ttyvp) {
 			vrele(vp);
 			ap->a_p->p_session->s_ttyvp = NULL;
 		}
 		/*
 		 * If the vnode is locked, then we are in the midst
 		 * of forcably closing the device, otherwise we only
 		 * close on last reference.
 		 */
 		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 		devclose = cdevsw[major(dev)]->d_close;
 		mode = S_IFCHR;
 		break;
 
 	case VBLK:
 		/*
 		 * On last close of a block device (that isn't mounted)
 		 * we must invalidate any in core blocks, so that
 		 * we can, for instance, change floppy disks.
 		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
+		VOP_UNLOCK(vp, 0, ap->a_p);
 		if (error)
 			return (error);
 
 		/*
 		 * We do not want to really close the device if it
 		 * is still in use unless we are trying to close it
 		 * forcibly. Since every use (buffer, vnode, swap, cmap)
 		 * holds a reference to the vnode, and because we mark
 		 * any other vnodes that alias this device, when the
 		 * sum of the reference counts on all the aliased
 		 * vnodes descends to one, we are on last close.
 		 */
 		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 
 		devclose = bdevsw[major(dev)]->d_close;
 		mode = S_IFBLK;
 		break;
 
 	default:
 		panic("spec_close: not special");
 	}
 
 	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 spec_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
 		minor(ap->a_vp->v_rdev));
 	return (0);
 }
 
 /*
  * Special device advisory byte-level locks.
  */
 /* ARGSUSED */
 static int
 spec_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 
 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
 }
 
 /*
  * Special device bad operation
  */
 static int
 spec_badop()
 {
 
 	panic("spec_badop called");
 	/* NOTREACHED */
 }
 
 static void
 spec_getpages_iodone(bp)
 	struct buf *bp;
 {
 
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 static int
 spec_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	vm_offset_t kva;
 	int error;
 	int i, pcount, size, s;
 	daddr_t blkno;
 	struct buf *bp;
 	vm_page_t m;
 	vm_ooffset_t offset;
 	int toff, nextoff, nread;
 	struct vnode *vp = ap->a_vp;
 	int blksiz;
 	int gotreqpage;
 
 	error = 0;
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 
 	/*
 	 * Calculate the offset of the transfer.
 	 */
 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
 
 	/* XXX sanity check before we go into details. */
 	/* XXX limits should be defined elsewhere. */
 #define	DADDR_T_BIT	32
 #define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
 	if (offset < 0 || offset > OFFSET_MAX) {
 		/* XXX still no %q in kernel. */
 		printf("spec_getpages: preposterous offset 0x%x%08x\n",
 		       (u_int)((u_quad_t)offset >> 32),
 		       (u_int)(offset & 0xffffffff));
 		return (VM_PAGER_ERROR);
 	}
 
 	blkno = btodb(offset);
 
 	/*
 	 * Round up physical size for real devices, use the
 	 * fundamental blocksize of the fs if possible.
 	 */
 	if (vp && vp->v_mount)
 		blksiz = vp->v_mount->mnt_stat.f_bsize;
 	else
 		blksiz = DEV_BSIZE;
 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
 
 	bp = getpbuf();
 	kva = (vm_offset_t)bp->b_data;
 
 	/*
 	 * Map the pages to be read into the kva.
 	 */
 	pmap_qenter(kva, ap->a_m, pcount);
 
 	/* Build a minimal buffer header. */
 	bp->b_flags = B_BUSY | B_READ | B_CALL;
 	bp->b_iodone = spec_getpages_iodone;
 
 	/* B_PHYS is not set, but it is nice to fill this in. */
 	bp->b_proc = curproc;
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = blkno;
 	bp->b_lblkno = blkno;
 	pbgetvp(ap->a_vp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 	bp->b_resid = 0;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += pcount;
 
 	/* Do the input. */
 	VOP_STRATEGY(bp);
 
 	s = splbio();
 
 	/* We definitely need to be at splbio here. */
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep(bp, PVM, "spread", 0);
 
 	splx(s);
 
 	if ((bp->b_flags & B_ERROR) != 0) {
 		if (bp->b_error)
 			error = bp->b_error;
 		else
 			error = EIO;
 	}
 
 	nread = size - bp->b_resid;
 
 	if (nread < ap->a_count) {
 		bzero((caddr_t)kva + nread,
 			ap->a_count - nread);
 	}
 	pmap_qremove(kva, pcount);
 
 
 	gotreqpage = 0;
 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= nread) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else if (toff < nread) {
 			int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
 			vm_page_set_validclean(m, 0, nvalid);
 		} else {
 			m->valid = 0;
 			m->dirty = 0;
 		}
 
 		if (i != ap->a_reqpage) {
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
 				if (m->valid) {
 					if (m->flags & PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 					PAGE_WAKEUP(m);
 				} else {
 					vm_page_free(m);
 				}
 			} else {
 				vm_page_free(m);
 			}
 		} else if (m->valid) {
 			gotreqpage = 1;
 		}
 	}
 	if (!gotreqpage) {
 		m = ap->a_m[ap->a_reqpage];
 #ifndef MAX_PERF
 		printf("spec_getpages: I/O read failure: (error code=%d)\n", error);
 		printf("               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
 				size, bp->b_resid, ap->a_count, m->valid);
 		printf("               nread: %d, reqpage: %d, pindex: %d, pcount: %d\n",
 				nread, ap->a_reqpage, m->pindex, pcount);
 #endif
 		/*
 		 * Free the buffer header back to the swap buffer pool.
 		 */
 		relpbuf(bp);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Free the buffer header back to the swap buffer pool.
 	 */
 	relpbuf(bp);
 	return VM_PAGER_OK;
 }
 
 /* ARGSUSED */
 static int
 spec_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	struct partinfo dpart;
 
 	bzero(vap, sizeof (*vap));
 
 	if (vp->v_type == VBLK)
 		vap->va_blocksize = BLKDEV_IOSIZE;
 	else if (vp->v_type == VCHR)
 		vap->va_blocksize = MAXBSIZE;
 
 	if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
 	    (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
 		vap->va_bytes = dbtob(dpart.disklab->d_partitions
 				      [minor(vp->v_rdev)].p_size);
 		vap->va_size = vap->va_bytes;
 	}
 	return (0);
 }
Index: head/sys/gnu/ext2fs/inode.h
===================================================================
--- head/sys/gnu/ext2fs/inode.h	(revision 34265)
+++ head/sys/gnu/ext2fs/inode.h	(revision 34266)
@@ -1,172 +1,181 @@
 /*
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)inode.h	8.9 (Berkeley) 5/14/95
- * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $
+ * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $
  */
 
 #ifndef _UFS_UFS_INODE_H_
 #define	_UFS_UFS_INODE_H_
 
 #include <sys/lock.h>
 #include <ufs/ufs/dinode.h>
 
 /*
+ * The size of a logical block number.
+ */
+typedef long ufs_lbn_t;
+
+/*
  * This must agree with the definition in <ufs/ufs/dir.h>.
  */
 #define	doff_t		int32_t
 
 /*
  * The inode is used to describe each active (or recently active) file in the
  * UFS filesystem. It is composed of two types of information. The first part
  * is the information that is needed only while the file is active (such as
  * the identity of the file and linkage to speed its lookup). The second part
  * is the permanent meta-data associated with the file which is read in
  * from the permanent dinode from long term storage when the file becomes
  * active, and is put back when the file is no longer being used.
  */
 struct inode {
 	struct	 lock i_lock;	/* Inode lock. >Keep this first< */
 	LIST_ENTRY(inode) i_hash;/* Hash chain. */
 	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
 	struct	vnode  *i_devvp;/* Vnode for block I/O. */
 	u_int32_t i_flag;	/* flags, see below */
 	dev_t	  i_dev;	/* Device associated with the inode. */
 	ino_t	  i_number;	/* The identity of the inode. */
+	int	  i_effnlink;	/* i_nlink when I/O completes */
 
 	union {			/* Associated filesystem. */
 		struct	fs *fs;		/* FFS */
 		struct	ext2_sb_info *e2fs;	/* EXT2FS */
 	} inode_u;
 #define	i_fs	inode_u.fs
 #define	i_e2fs	inode_u.e2fs
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
 	int32_t	  i_count;	/* Size of free slot in directory. */
 	doff_t	  i_endoff;	/* End of useful stuff in directory. */
 	doff_t	  i_diroff;	/* Offset in dir, where we found last entry. */
 	doff_t	  i_offset;	/* Offset of free space in directory. */
 	ino_t	  i_ino;	/* Inode number of found directory. */
 	u_int32_t i_reclen;	/* Size of found directory entry. */
 	int	  i_spare[5];	/* XXX actually non-spare (for ext2fs). */
 	/*
 	 * The on-disk dinode itself.
 	 */
 	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
 };
 
 #define	i_atime		i_din.di_atime
 #define	i_atimensec	i_din.di_atimensec
 #define	i_blocks	i_din.di_blocks
 #define	i_ctime		i_din.di_ctime
 #define	i_ctimensec	i_din.di_ctimensec
 #define	i_db		i_din.di_db
 #define	i_flags		i_din.di_flags
 #define	i_gen		i_din.di_gen
 #define	i_gid		i_din.di_gid
 #define	i_ib		i_din.di_ib
 #define	i_mode		i_din.di_mode
 #define	i_mtime		i_din.di_mtime
 #define	i_mtimensec	i_din.di_mtimensec
 #define	i_nlink		i_din.di_nlink
 #define	i_rdev		i_din.di_rdev
 #define	i_shortlink	i_din.di_shortlink
 #define	i_size		i_din.di_size
 #define	i_uid		i_din.di_uid
 
 /* These flags are kept in i_flag. */
 #define	IN_ACCESS	0x0001		/* Access time update request. */
 #define	IN_CHANGE	0x0002		/* Inode change time update request. */
 #define	IN_UPDATE	0x0004		/* Modification time update request. */
 #define	IN_MODIFIED	0x0008		/* Inode has been modified. */
 #define	IN_RENAME	0x0010		/* Inode is being renamed. */
 #define	IN_SHLOCK	0x0020		/* File has shared lock. */
 #define	IN_EXLOCK	0x0040		/* File has exclusive lock. */
 #define	IN_HASHED	0x0080		/* Inode is on hash list */
 
 #ifdef KERNEL
 /*
  * Structure used to pass around logical block paths generated by
  * ufs_getlbns and used by truncate and bmap code.
  */
 struct indir {
 	ufs_daddr_t in_lbn;		/* Logical block number. */
 	int	in_off;			/* Offset in buffer. */
 	int	in_exists;		/* Flag if the block exists. */
 };
 
 /* Convert between inode pointers and vnode pointers. */
 #define VTOI(vp)	((struct inode *)(vp)->v_data)
 #define ITOV(ip)	((ip)->i_vnode)
 
 /*
  * XXX this is too long to be a macro, and isn't used in any time-critical
  * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a
  * header file.
  */
 #define	ITIMES(ip, t1, t2) {						\
 	long tv_sec = time.tv_sec;					\
 	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
 		(ip)->i_flag |= IN_MODIFIED;				\
 		if ((ip)->i_flag & IN_ACCESS)				\
 			(ip)->i_atime					\
 			= ((t1) == &time ? tv_sec : (t1)->tv_sec);	\
 		if ((ip)->i_flag & IN_UPDATE) {				\
 			(ip)->i_mtime					\
 			= ((t2) == &time ? tv_sec : (t2)->tv_sec);	\
 			(ip)->i_modrev++;				\
 		}							\
 		if ((ip)->i_flag & IN_CHANGE)				\
 			(ip)->i_ctime = tv_sec;				\
 		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
 	}								\
 }
+
+/* Determine if soft dependencies are being done */
+#define DOINGSOFTDEP(vp)	((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
 	u_int16_t ufid_len;	/* Length of structure. */
 	u_int16_t ufid_pad;	/* Force 32-bit alignment. */
 	ino_t	  ufid_ino;	/* File number (ino). */
 	int32_t	  ufid_gen;	/* Generation number. */
 };
 #endif /* KERNEL */
 
 #endif /* !_UFS_UFS_INODE_H_ */
Index: head/sys/gnu/fs/ext2fs/inode.h
===================================================================
--- head/sys/gnu/fs/ext2fs/inode.h	(revision 34265)
+++ head/sys/gnu/fs/ext2fs/inode.h	(revision 34266)
@@ -1,172 +1,181 @@
 /*
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)inode.h	8.9 (Berkeley) 5/14/95
- * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $
+ * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $
  */
 
 #ifndef _UFS_UFS_INODE_H_
 #define	_UFS_UFS_INODE_H_
 
 #include <sys/lock.h>
 #include <ufs/ufs/dinode.h>
 
 /*
+ * The size of a logical block number.
+ */
+typedef long ufs_lbn_t;
+
+/*
  * This must agree with the definition in <ufs/ufs/dir.h>.
  */
 #define	doff_t		int32_t
 
 /*
  * The inode is used to describe each active (or recently active) file in the
  * UFS filesystem. It is composed of two types of information. The first part
  * is the information that is needed only while the file is active (such as
  * the identity of the file and linkage to speed its lookup). The second part
  * is the permanent meta-data associated with the file which is read in
  * from the permanent dinode from long term storage when the file becomes
  * active, and is put back when the file is no longer being used.
  */
 struct inode {
 	struct	 lock i_lock;	/* Inode lock. >Keep this first< */
 	LIST_ENTRY(inode) i_hash;/* Hash chain. */
 	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
 	struct	vnode  *i_devvp;/* Vnode for block I/O. */
 	u_int32_t i_flag;	/* flags, see below */
 	dev_t	  i_dev;	/* Device associated with the inode. */
 	ino_t	  i_number;	/* The identity of the inode. */
+	int	  i_effnlink;	/* i_nlink when I/O completes */
 
 	union {			/* Associated filesystem. */
 		struct	fs *fs;		/* FFS */
 		struct	ext2_sb_info *e2fs;	/* EXT2FS */
 	} inode_u;
 #define	i_fs	inode_u.fs
 #define	i_e2fs	inode_u.e2fs
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
 	int32_t	  i_count;	/* Size of free slot in directory. */
 	doff_t	  i_endoff;	/* End of useful stuff in directory. */
 	doff_t	  i_diroff;	/* Offset in dir, where we found last entry. */
 	doff_t	  i_offset;	/* Offset of free space in directory. */
 	ino_t	  i_ino;	/* Inode number of found directory. */
 	u_int32_t i_reclen;	/* Size of found directory entry. */
 	int	  i_spare[5];	/* XXX actually non-spare (for ext2fs). */
 	/*
 	 * The on-disk dinode itself.
 	 */
 	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
 };
 
 #define	i_atime		i_din.di_atime
 #define	i_atimensec	i_din.di_atimensec
 #define	i_blocks	i_din.di_blocks
 #define	i_ctime		i_din.di_ctime
 #define	i_ctimensec	i_din.di_ctimensec
 #define	i_db		i_din.di_db
 #define	i_flags		i_din.di_flags
 #define	i_gen		i_din.di_gen
 #define	i_gid		i_din.di_gid
 #define	i_ib		i_din.di_ib
 #define	i_mode		i_din.di_mode
 #define	i_mtime		i_din.di_mtime
 #define	i_mtimensec	i_din.di_mtimensec
 #define	i_nlink		i_din.di_nlink
 #define	i_rdev		i_din.di_rdev
 #define	i_shortlink	i_din.di_shortlink
 #define	i_size		i_din.di_size
 #define	i_uid		i_din.di_uid
 
 /* These flags are kept in i_flag. */
 #define	IN_ACCESS	0x0001		/* Access time update request. */
 #define	IN_CHANGE	0x0002		/* Inode change time update request. */
 #define	IN_UPDATE	0x0004		/* Modification time update request. */
 #define	IN_MODIFIED	0x0008		/* Inode has been modified. */
 #define	IN_RENAME	0x0010		/* Inode is being renamed. */
 #define	IN_SHLOCK	0x0020		/* File has shared lock. */
 #define	IN_EXLOCK	0x0040		/* File has exclusive lock. */
 #define	IN_HASHED	0x0080		/* Inode is on hash list */
 
 #ifdef KERNEL
 /*
  * Structure used to pass around logical block paths generated by
  * ufs_getlbns and used by truncate and bmap code.
  */
 struct indir {
 	ufs_daddr_t in_lbn;		/* Logical block number. */
 	int	in_off;			/* Offset in buffer. */
 	int	in_exists;		/* Flag if the block exists. */
 };
 
 /* Convert between inode pointers and vnode pointers. */
 #define VTOI(vp)	((struct inode *)(vp)->v_data)
 #define ITOV(ip)	((ip)->i_vnode)
 
 /*
  * XXX this is too long to be a macro, and isn't used in any time-critical
  * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a
  * header file.
  */
 #define	ITIMES(ip, t1, t2) {						\
 	long tv_sec = time.tv_sec;					\
 	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
 		(ip)->i_flag |= IN_MODIFIED;				\
 		if ((ip)->i_flag & IN_ACCESS)				\
 			(ip)->i_atime					\
 			= ((t1) == &time ? tv_sec : (t1)->tv_sec);	\
 		if ((ip)->i_flag & IN_UPDATE) {				\
 			(ip)->i_mtime					\
 			= ((t2) == &time ? tv_sec : (t2)->tv_sec);	\
 			(ip)->i_modrev++;				\
 		}							\
 		if ((ip)->i_flag & IN_CHANGE)				\
 			(ip)->i_ctime = tv_sec;				\
 		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
 	}								\
 }
+
+/* Determine if soft dependencies are being done */
+#define DOINGSOFTDEP(vp)	((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
 	u_int16_t ufid_len;	/* Length of structure. */
 	u_int16_t ufid_pad;	/* Force 32-bit alignment. */
 	ino_t	  ufid_ino;	/* File number (ino). */
 	int32_t	  ufid_gen;	/* Generation number. */
 };
 #endif /* KERNEL */
 
 #endif /* !_UFS_UFS_INODE_H_ */
Index: head/sys/i386/conf/LINT
===================================================================
--- head/sys/i386/conf/LINT	(revision 34265)
+++ head/sys/i386/conf/LINT	(revision 34266)
@@ -1,1497 +1,1504 @@
 #
 # LINT -- config file for checking all the sources, tries to pull in
 #	as much of the source tree as it can.
 #
-#	$Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $
+#	$Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $
 #
 # NB: You probably don't want to try running a kernel built from this
 # file.  Instead, you should start from GENERIC, and add options from
 # this file as required.
 #
 
 #
 # This directive is mandatory; it defines the architecture to be
 # configured for; in this case, the 386 family based IBM-PC and
 # compatibles.
 #
 machine		"i386"
 
 # 
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a complicated formula defined in param.c.
 #
 maxusers	10
 
 #
 # Certain applications can grow to be larger than the 128M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 256MB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  You might want to set the default lower than the
 # max, and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options		"MAXDSIZ=(256*1024*1024)"
 options		"DFLDSIZ=(256*1024*1024)"
 
 # When this is set, be extra conservative in various parts of the kernel
 # and choose functionality over speed (on the widest variety of systems).
 options		FAILSAFE
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings /kernel | grep ^___ | sed -e 's/^___//' > MYKERNEL
 #
 options         INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # This directive defines a number of things:
 #  - The compiled kernel is to be called `kernel'
 #  - The root filesystem might be on partition wd0a
 #  - Crash dumps will be written to wd0b, if possible.  Specifying the
 #    dump device here is not recommended.  Use dumpon(8).
 #
 config		kernel	root on wd0 dumps on wd0
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 # APIC_IO enables the use of the IO APIC for Symmetric I/O.
 # NCPU sets the number of CPUs, defaults to 2.
 # NBUS sets the number of busses, defaults to 4.
 # NAPIC sets the number of IO APICs on the motherboard, defaults to 1.
 # NINTR sets the total number of INTs provided by the motherboard.
 #
 # Notes:
 #
 #  An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard.
 #
 #  Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels.
 #
 #  Check the 'Rogue SMP hardware' section to see if additional options
 #   are required by your hardware.
 #
 
 # Mandatory:
 options		SMP			# Symmetric MultiProcessor Kernel
 options		APIC_IO			# Symmetric (APIC) I/O
 
 # Optional, these are the defaults plus 1:
 options		NCPU=5			# number of CPUs
 options		NBUS=5			# number of busses
 options		NAPIC=2			# number of IO APICs
 options		NINTR=25		# number of INTs
 
 #
 # Rogue SMP hardware:
 #
 
 # Bridged PCI cards:
 #
 # The MP tables of most of the current generation MP motherboards
 #  do NOT properly support bridged PCI cards.  To use one of these
 #  cards you should refer to ???
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.  This is especially true removing
 # I386_CPU.
 #
 cpu		"I386_CPU"
 cpu		"I486_CPU"
 cpu		"I586_CPU"		# aka Pentium(tm)
 cpu		"I686_CPU"		# aka Pentium Pro(tm)
 
 #
 # Options for CPU features.
 #
 # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
 # BlueLightning CPU.  It works only with Cyrix FPU, and this option
 # should not be used with Intel FPU.
 #
 # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning 
 # CPU if CPU supports it. The default is double-clock mode on
 # BlueLightning CPU box.  
 #
 # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
 #
 # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
 # mapped mode.  Default is 2-way set associative mode.
 #
 # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
 # of Cyrix 6x86 and 6x86MX CPUs.  If this option is not set and
 # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
 #
 # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
 # reorder).  This option should not be used if you use memory mapped
 # I/O device(s). 
 #
 # CPU_FASTER_5X86_FPU enables faster FPU exception handler.
 #
 # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
 # for i386 machines. 
 #
 # CPU_IORT defines I/O clock delay time (NOTE 1).  Default vaules of
 # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
 # (no clock delay).
 #
 # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
 # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
 # 1). 
 #
 # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
 #
 # CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
 # enters suspend mode following execution of HALT instruction.
 #
 # CPU_WT_ALLOC enables write-through allocation.
 #
 # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
 # flush at hold state.
 #
 # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
 # without cache flush at hold state, and (2) write-back CPU cache on
 # Cyrix 6x86 whose revision < 2.7 (NOTE 2).
 #
 # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
 # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
 # executed.  This should be included for ALL kernels that won't run
 # on a Pentium.
 #
 # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
 # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs.
 # These options may crash your system. 
 #
 # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
 # in write-through mode when revision < 2.7.  If revision of Cyrix
 # 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
 #
 # NOTE 3: This option may cause failures for software that requires
 # locked cycles in order to operate correctly.
 #
 options		"CPU_BLUELIGHTNING_FPU_OP_CACHE"
 options		"CPU_BLUELIGHTNING_3X"
 options		"CPU_BTB_EN"
 options		"CPU_DIRECT_MAPPED_CACHE"
 options		"CPU_DISABLE_5X86_LSSER"
 options		"CPU_FASTER_5X86_FPU"
 options		"CPU_I486_ON_386"
 options		"CPU_IORT"
 options		"CPU_LOOP_EN"
 options		"CPU_RSTK_EN"
 options		"CPU_SUSP_HLT"
 options		"CYRIX_CACHE_WORKS"
 options		"CYRIX_CACHE_REALLY_WORKS"
 #options	"NO_F00F_HACK"
 
 #
 # A math emulator is mandatory if you wish to run on hardware which
 # does not have a floating-point processor.  Pick either the original,
 # bogus (but freely-distributable) math emulator, or a much more
 # fully-featured but GPL-licensed emulator taken from Linux.
 #
 options		MATH_EMULATE		#Support for x87 emulation
 # Don't enable both of these in a real config.
 options		GPL_MATH_EMULATE	#Support for x87 emulation via
 					#new math emulator 
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS                                             
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.
 #
 options		"COMPAT_43"
 
 #
 # Allow user-mode programs to manipulate their local descriptor tables.
 # This option is required for the WINE Windows(tm) emulator, and is
 # not used by anything else (that we know of).
 #
 options		USER_LDT		#allow user-level control of i386 ldt
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options		SYSVSHM
 options		SYSVSEM
 options		SYSVMSG
 
 #
 # This option includes a MD5 routine in the kernel, this is used for
 # various authentication and privacy uses.
 #
 options		"MD5"
 
 #
 # Allow processes to switch to vm86 mode, as well as enabling direct
 # user-mode access to the I/O port space.  This option is necessary for 
 # the doscmd emulator to run.
 #
 options		"VM86"
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Enable the kernel debugger.
 #
 options		DDB
 
 #
 # Don't drop into DDB for a panic. Intended for unattended operation
 # where you may want to drop to DDB from the console, but still want
 # the machine to recover from a panic
 #
 options		DDB_UNATTENDED
 
 #
 # If using GDB remote mode to debug the kernel, there's a non-standard
 # extension to the remote protocol that can be used to use the serial
 # port as both the debugging port and the system console.  It's non-
 # standard and you're on your own if you enable it.  See also the
 # "remotechat" variables in the FreeBSD specific version of gdb.
 #
 options		GDB_REMOTE_CHAT
 
 # 
 # KTRACE enables the system-call tracing facility ktrace(2).
 #
 options		KTRACE			#kernel tracing
 
 #
 # The DIAGNOSTIC option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options		DIAGNOSTIC
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 options		PERFMON
 
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options COMPILING_LINT
 
 
 # XXX - this doesn't belong here.
 # Allow ordinary users to take the console - this is useful for X.
 options		UCONSOLE
 
 # XXX - this doesn't belong here either
 options		USERCONFIG		#boot -c editor
 options		USERCONFIG_BOOT		#imply -c and parse info area
 options		VISUAL_USERCONFIG	#visual boot -c editor
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #  Source code for the NS (Xerox Network Service) is provided for amusement
 #  value.
 #
 options		INET			#Internet communications protocols
 
 options		IPX			#IPX/SPX communications protocols
 options		IPXIP			#IPX in IP encapsulation (not available)
 options		IPTUNNEL		#IP in IPX encapsulation (not available)
 
 options		NETATALK		#Appletalk communications protocols
 
 # These are currently broken but are shipped due to interest.
 #options		NS			#Xerox NS protocols
 
 # These are currently broken and are no longer shipped due to lack
 # of interest.
 #options		CCITT			#X.25 network layer
 #options		ISO
 #options		TPIP			#ISO TP class 4 over IP
 #options		TPCONS			#ISO TP class 0 over X.25
 #options		LLC			#X.25 link layer for Ethernets
 #options		HDLC			#X.25 link layer for serial lines
 #options		EON			#ISO CLNP over IP
 #options		NSIP			#XNS over IP
 
 #
 # Network interfaces:
 #  The `loop' pseudo-device is MANDATORY when networking is enabled.
 #  The `ether' pseudo-device provides generic code to handle
 #  Ethernets; it is MANDATORY when a Ethernet device driver is
 #  configured.
 #  The 'fddi' pseudo-device provides generic code to support FDDI.
 #  The `sppp' pseudo-device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' pseudo-device implements the Serial Line IP (SLIP) service.
 #  The `ppp' pseudo-device implements the Point-to-Point Protocol.
 #  The `bpfilter' pseudo-device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' pseudo-device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.
 #  The `tun' pseudo-device implements the User Process PPP (iijppp)
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpfilter.
 # See pppd(8) for more details.
 #
 pseudo-device	ether			#Generic Ethernet
 pseudo-device	fddi			#Generic FDDI
 pseudo-device	sppp			#Generic Synchronous PPP
 pseudo-device	loop			#Network loopback device
 pseudo-device	bpfilter	4	#Berkeley packet filter
 pseudo-device	disc			#Discard device
 pseudo-device	tun	1		#Tunnel driver (user process ppp(8))
 pseudo-device	sl	2		#Serial Line IP
 pseudo-device	ppp	2		#Point-to-point protocol
 options PPP_BSDCOMP			#PPP BSD-compress support
 options PPP_DEFLATE			#PPP zlib/deflate/gzip support
 options PPP_FILTER			#enable bpf filtering (needs bpfilter)
 
 #
 # Internet family options:
 #
 # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in
 # 4.2BSD.  This option should not be used unless you have a 4.2BSD
 # machine and TCP connections fail.
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # TCPDEBUG is undocumented.
 #
 options		"TCP_COMPAT_42"		#emulate 4.2BSD TCP bugs
 options		MROUTING		# Multicast routing
 options         IPFIREWALL              #firewall
 options         IPFIREWALL_VERBOSE      #print information about
 					# dropped packets
 options		"IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity
 options		IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default
 options		IPDIVERT		#divert sockets
 options		TCPDEBUG
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family---FFS, and MFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options		FFS			#Fast filesystem
 options		NFS			#Network File System
 
 # The rest are optional:
 # options	NFS_NOSERVER		#Disable the NFS-server code.
 options		"CD9660"		#ISO 9660 filesystem
 options		FDESC			#File descriptor filesystem
 options		KERNFS			#Kernel filesystem
 options		MFS			#Memory File System
 options		MSDOSFS			#MS DOS File System
 options		NULLFS			#NULL filesystem
 options		PORTAL			#Portal filesystem
 options		PROCFS			#Process filesystem
 options		UMAPFS			#UID map filesystem
 options		UNION			#Union filesystem
 options		"CD9660_ROOT"		#CD-ROM usable as root device
 options		FFS_ROOT		#FFS usable as root device
 options		NFS_ROOT		#NFS usable as root device
 # This DEVFS is experimental but seems to work
 options		DEVFS			#devices filesystem
+
+# Allow the FFS to use Softupdates technology.
+# To do this you need to fetch the two files
+# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c
+# from freebsd.org and understand the licensing restrictions.
+#options		SOFTUPDATES
+# (we can't actually enable it because the files may not be present)
 
 # Make space in the kernel for a MFS root filesystem.  Define to the number
 # of kilobytes to reserve for the filesystem.
 options		MFS_ROOT=10
 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing.
 options		MFS_AUTOLOAD
 
 # Allow this many swap-devices.
 options		NSWAPDEV=20
 
 # Disk quotas are supported when this option is enabled.  If you
 # change the value of this option, you must do a `make clean' in your
 # kernel compile directory in order to get a working kernel.
 #
 options		QUOTA			#enable disk quotas
 
 # Add more checking code to various filesystems
 #options		NULLFS_DIAGNOSTIC
 #options		KERNFS_DIAGNOSTIC
 #options		UMAPFS_DIAGNOSTIC
 #options		UNION_DIAGNOSTIC
 
 # In particular multi-session CD-Rs might require a huge amount of
 # time in order to "settle".  If we are about mounting them as the
 # root f/s, we gotta wait a little.
 #
 # The number is supposed to be in seconds.
 options		"CD9660_ROOTDELAY=20"
 
 # If you are running a machine just as a fileserver for PC and MAC users.
 # (using SAMBA or Netatalk), then you may consider setting this option
 # and keeping all those user's directories on a partition that is mounted
 # with the suiddir option. This gives new files the same ownership as
 # the directory (similiar to group). It's a security hole if you let
 # these users run programs so confine it to file-servers, (but it'll save you
 # lots of headaches in that case). Root owned directories are excempt and X bits
 # are cleared. the suid bit must be set on the directory as well. see chmod(1)
 # PC owners can't see/set ownerships so they keep getting their toes
 # trodden on. This saves you all the support calls as the filesystem
 # it's used on will act as they expect. ("It's my dir so it must be my file").
 #
 options		SUIDDIR
 
 
 # Add some error checking code to the null_bypass routine
 # in the NULL filesystem
 #options		SAFETY
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so
 # that a given bus, target, and LUN always come on line as the same
 # device unit.  In earlier versions the unit numbers were assigned
 # in the order that the devices were probed on the SCSI bus.  This
 # means that if you removed a disk drive, you may have had to rewrite
 # your /etc/fstab file, and also that you had to be careful when adding
 # a new disk as it may have been probed earlier and moved your device
 # configuration around.
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "sd3" then the first
 # non-wired disk will be assigned sd4.
 
 # The syntax for wiring down devices is:
 
 # controller	scbus0 at ahc0		# Single bus device
 # controller	scbus1 at ahc1 bus 0	# Single bus device
 # controller	scbus3 at ahc2 bus 0	# Twin bus device
 # controller	scbus2 at ahc2 bus 1	# Twin bus device
 # disk 		sd0 at scbus0 target 0 unit 0
 # disk		sd1 at scbus3 target 1
 # disk		sd2 at scbus2 target 3
 # tape		st1 at scbus1 target 6
 # device	cd0 at scbus?
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration and doesn't have to be explicitly configured.
 
 controller	scbus0	#base SCSI code
 device		ch0	#SCSI media changers
 device		sd0	#SCSI disks
 device		st0	#SCSI tapes
 device		cd0	#SCSI CD-ROMs
 device		od0	#SCSI optical disk
 
 # The previous devices (ch, sd, st, cd) are recognized by config.
 # config doesn't (and shouldn't) know about these newer ones,
 # so we have to specify that they are on a SCSI bus with the "at scbus?"
 # clause.
 
 device worm0 at scbus?	# SCSI worm
 device pt0 at scbus?	# SCSI processor type
 device sctarg0 at scbus? # SCSI target
 
 # SCSI OPTIONS:
 
 # SCSIDEBUG: When defined enables debugging macros
 # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k)
 # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead
 #                       of only when booting verbosely.
 options		SCSIDEBUG
 #options	NO_SCSI_SENSE
 options		SCSI_REPORT_GEOMETRY
 
 # Options for the `od' optical disk driver:
 #
 # If drive returns sense key as 0x02 with vendor specific additional
 # sense code (ASC) and additional sense code qualifier (ASCQ), or
 # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying.
 # To suppress this, use the following option.
 #
 options		OD_BOGUS_NOT_READY
 #
 # For an automatic spindown, try this.  Again, preferably as an
 # option in your config file.
 # WARNING!  Use at your own risk.  Joerg's ancient SONY SMO drive
 # groks it fine, while Shunsuke's Fujitsu chokes on it and times
 # out.
 #
 options		OD_AUTO_TURNOFF
 
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 pseudo-device	pty	16	#Pseudo ttys - can go as high as 256
 pseudo-device	speaker		#Play IBM BASIC-style noises out your speaker
 pseudo-device	gzip		#Exec gzipped a.out's
 pseudo-device	vn		#Vnode driver (turns a file into a device)
 pseudo-device	snp	3	#Snoop device - to look at pty/vty/etc..
 pseudo-device	ccd	4	#Concatenated disk driver
 
 # These are only for watching for bitrot in old tty code.
 # broken
 #pseudo-device	tb
 
 # These are only for watching for bitrot in old SCSI code.
 pseudo-device	su		#scsi user
 pseudo-device	ssc		#super scsi
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # ISA and EISA devices:
 # EISA support is available for some device, so they can be auto-probed.
 # Micro Channel is not supported at all.
 
 #
 # Mandatory ISA devices: isa, npx
 #
 controller	isa0
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # BOUNCE_BUFFERS provides support for ISA DMA on machines with more
 # than 16 megabytes of memory.  It doesn't hurt on other machines.
 # Some broken EISA and VLB hardware may need this, too.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # TUNE_1542 enables the automatic ISA bus speed selection for the
 # Adaptec 1542 boards. Does not work for all boards, use it with caution.
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 #
 # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum
 
 options		"AUTO_EOI_1"
 #options	"AUTO_EOI_2"
 options		BOUNCE_BUFFERS
 options		"MAXMEM=(128*1024)"
 options 	"TUNE_1542"
 #options	BROKEN_KEYBOARD_RESET
 #options	PAS_JOYSTICK_ENABLE
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z
 
 options		PPS_SYNC
 
 # Enable PnP support in the kernel.  This allows you to automaticly
 # attach to PnP cards for drivers that support it and allows you to
 # configure cards from USERCONFIG.  See pnp(4) for more info.
 controller	pnp0
 
 # The pcvt console driver (vt220 compatible).
 device		vt0	at isa? port "IO_KBD" tty irq 1 vector pcrint
 options		XSERVER			# support for running an X server.
 options		FAT_CURSOR		# start with block cursor
 # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops
 options		PCVT_SCANSET=2 		# IBM keyboards are non-std
 
 # The syscons console driver (sco color console compatible).
 device		sc0	at isa? port "IO_KBD" tty irq 1 vector scintr
 options		MAXCONS=16		# number of virtual consoles
 options		SLOW_VGA		# do byte-wide i/o's to TS and GDC regs
 options		"STD8X16FONT"		# Compile font in
 makeoptions	"STD8X16FONT"="cp850"
 options		SC_HISTORY_SIZE=200	# number of history buffer lines
 
 #
 # `flags' for sc0:
 #       0x01    Use a 'visual' bell
 #       0x02    Use a 'blink' cursor
 #       0x04    Use a 'underline' cursor
 #       0x06    Use a 'blinking underline' (destructive) cursor
 #       0x08    Force detection of keyboard, else we always assume a keyboard
 #       0x10    Old-style (XT) keyboard support, useful for older ThinkPads
 #       0x20    Don't reset keyboard, useful for some newer ThinkPads
 
 #
 # The Numeric Processing eXtension driver.  This should be configured if
 # your machine has a math co-processor, unless the coprocessor is very
 # buggy. If it is not configured then you *must* configure math emulation
 # (see above).  If both npx0 and emulation are configured, then only npx0
 # is used (provided it works).
 device		npx0	at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr
 
 #
 # `flags' for npx0:
 #	0x01	don't use the npx registers to optimize bcopy
 #	0x02	don't use the npx registers to optimize bzero
 #	0x04	don't use the npx registers to optimize copyin or copyout.
 # The npx registers are normally used to optimize copying and zeroing when
 # all of the following conditions are satisfied:
 #	"I586_CPU" is an option
 #	the cpu is an i586 (perhaps not a Pentium)
 #	the probe for npx0 succeeds
 #	INT 16 exception handling works.
 # Then copying and zeroing using the npx registers is normally 30-100% faster.
 # The flags can be used to control cases where it doesn't work or is slower.
 # Setting them at boot time using userconfig works right (the optimizations
 # are not used until later in the bootstrap when npx0 is attached).
 #
 
 #
 # `iosiz' for npx0:
 # This can be used instead of the MAXMEM option to set the memory size.  If
 # it is nonzero, then it overrides both the MAXMEM option and the memory
 # size reported by the BIOS.  Setting it at boot time using userconfig takes
 # effect on the next reboot after the change has been recorded in the kernel
 # binary (the size is used early in the boot before userconfig has a chance
 # to change it).
 #
 
 #
 # Optional ISA and EISA devices:
 #
 
 #
 # SCSI host adapters: `aha', `aic', `bt', `nca'
 #
 # aha: Adaptec 154x
 # ahc: Adaptec 274x/284x/294x
 # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!)
 # bt: Most Buslogic controllers
 # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130
 # uha: UltraStore 14F and 34F
 # sea: Seagate ST01/02 8 bit controller (slow!)
 # wds: Western Digital WD7000 controller (no scatter/gather!).
 #
 # Note that the order is important in order for Buslogic cards to be
 # probed correctly.
 #
 
 controller	bt0	at isa? port "IO_BT0" bio irq ? vector bt_isa_intr
 controller	aha0	at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr
 controller	uha0	at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr
 
 controller      aic0    at isa? port 0x340 bio irq 11 vector aicintr
 controller	nca0	at isa? port 0x1f88 bio irq 10 vector ncaintr
 controller	nca1	at isa? port 0x1f84
 controller	nca2	at isa? port 0x1f8c
 controller	nca3	at isa? port 0x1e88
 controller	nca4	at isa? port 0x350 bio irq 5 vector ncaintr
 
 controller	sea0	at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr
 controller	wds0	at isa? port 0x350 bio irq 15 drq 6 vector wdsintr
 
 #
 # ST-506, ESDI, and IDE hard disks: `wdc' and `wd'
 #
 # The flags fields are used to enable the multi-sector I/O and
 # the 32BIT I/O modes.  The flags may be used in either the controller
 # definition or in the individual disk definitions.  The controller
 # definition is supported for the boot configuration stuff.
 #
 # Each drive has a 16 bit flags value defined:
 #	The low 8 bits are the maximum value for the multi-sector I/O,
 #	where 0xff defaults to the maximum that the drive can handle.
 #	The high bit of the 16 bit flags (0x8000) allows probing for
 #	32 bit transfers.  Bit 14 (0x4000) enables a hack to wake
 #	up powered-down laptop drives.  Bit 13 (0x2000) allows
 #	probing for PCI IDE DMA controllers, such as Intel's PIIX
 #	south bridges.  See the wd.4 man page.
 #
 # The flags field for the drives can be specified in the controller
 # specification with the low 16 bits for drive 0, and the high 16 bits
 # for drive 1.
 # e.g.:
 #controller	wdc0	at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr
 #
 # specifies that drive 0 will be allowed to probe for 32 bit transfers and
 # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be
 # allowed to probe for 32 bit transfers, but will allow multi-sector
 # transfers up to the maximum that the drive supports.
 #
 # If you are using a PCI controller that is not running in compatibility
 # mode (for example, it is a 2nd IDE PCI interface), then use config line(s)
 # such as:
 #
 #controller	wdc2	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd4	at wdc2 drive 0
 #disk		wd5	at wdc2 drive 1
 #
 #controller	wdc3	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd6	at wdc3 drive 0
 #disk		wd7	at wdc3 drive 1
 #
 # Note that the above config would be useful for a Promise card, when used
 # on a MB that already has a PIIX controller.  Note the bogus irq and port
 # entries.  These are automatically filled in by the IDE/PCI support.
 #
 
 controller	wdc0	at isa? port "IO_WD1" bio irq 14 vector wdintr
 disk		wd0	at wdc0 drive 0
 disk		wd1	at wdc0 drive 1
 controller	wdc1	at isa? port "IO_WD2" bio irq 15 vector wdintr
 disk		wd2	at wdc1 drive 0
 disk		wd3	at wdc1 drive 1
 
 #
 # Options for `wdc':
 #
 # CMD640 enables serializing access to primary and secondary channel
 # of the CMD640B IDE Chip. The serializing will only take place
 # if this option is set *and* the chip is probed by the pci-system.
 #
 options         "CMD640"	#Enable work around for CMD640 h/w bug
 #
 # ATAPI enables the support for ATAPI-compatible IDE devices
 #
 options         ATAPI   #Enable ATAPI support for IDE bus
 options		ATAPI_STATIC	#Don't do it as an LKM
 
 # IDE CD-ROM driver - requires wdc controller and ATAPI option
 device          wcd0
 
 # IDE floppy driver - requires wdc controller and ATAPI option
 device          wfd0
 
 
 #
 # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft'
 #
 controller	fdc0	at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options		FDC_DEBUG
 # This option is undocumented on purpose.
 options		FDC_PRINT_BOGUS_CHIPTYPE
 #
 # Activate this line instead of the fdc0 line above if you happen to
 # have an Insight floppy tape.  Probing them proved to be dangerous
 # for people with floppy disks only, so it's "hidden" behind a flag:
 #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr
 
 disk		fd0	at fdc0 drive 0
 disk		fd1	at fdc0 drive 1
 tape		ft0	at fdc0 drive 2
 
 
 #
 # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc.
 #
 # lpt: printer port
 #	lpt specials:
 #		port can be specified as ?, this will cause the driver to scan
 #		the BIOS port list;
 #		the irq and vector clauses may be omitted, this
 #		will force the port into polling mode.
 # mse: Logitech and ATI InPort bus mouse ports
 # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd]
 # sio: serial ports (see sio(4))
 
 device		lpt0	at isa? port? tty irq 7 vector lptintr
 device		lpt1	at isa? port "IO_LPT3" tty irq 5 vector lptintr
 device		mse0	at isa? port 0x23c tty irq 5 vector mseintr
 device		psm0	at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr
 
 # Options for psm:
 options		PSM_HOOKAPM		#hook the APM resume event, useful
 					#for some laptops
 options		PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 device		sio0	at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr
 
 #
 # `flags' for serial drivers that support consoles (only for sio now):
 #	0x10	enable console support for this unit.  The other console flags
 #		are ignored unless this is set.  Enabling console support does
 #		not make the unit the preferred console - boot with -h or set
 #		the 0x20 flag for that.  Currently, at most one unit can have
 #		console support; the first one (in config file order) with
 #		this flag set is preferred.  Setting this flag for sio0 gives
 #		the old behaviour.
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #
 # PnP `flags' (set via userconfig using pnp x flags y)
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 #
 
 # Options for serial drivers that support consoles (only for sio now):
 options		BREAK_TO_DEBUGGER	#a BREAK on a comconsole goes to 
 					#DDB, if available.
 options		CONSPEED=9600		#default speed for serial console (default 9600)
 
 # Options for sio:
 options		COM_ESP			#code for Hayes ESP
 options		COM_MULTIPORT		#code for some cards with shared IRQs
 options		DSI_SOFT_MODEM		#code for DSI Softmodems
 options		"EXTRA_SIO=2"		#number of extra sio ports to allocate
 
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc'
 #
 # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp)
 # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
 # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 # el: 3Com 3C501 (slow!)
 # ep: 3Com 3C509 (buggy)
 # fe: Fujitsu MB86960A/MB86965A Ethernet
 # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress
 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
 #     DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
 # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL)
 # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
 # wl: Lucent Wavelan (ISA card only).
 # ze: IBM/National Semiconductor PCMCIA ethernet controller.
 # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for
 #     send/receive operation, but it needs 'iomem' to read/write the
 #     attribute memory)
 #
 
 device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr
 device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr
 device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr
 device eg0 at isa? port 0x310 net irq 5 vector egintr
 device el0 at isa? port 0x300 net irq 9 vector elintr
 device ep0 at isa? port 0x300 net irq 10 vector epintr
 device ex0 at isa? port? net irq? vector exintr
 device fe0 at isa? port 0x300 net irq ? vector feintr
 device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr
 device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr
 device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr
 device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr
 device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr
 options		WLCACHE		# enables the signal-strength cache
 options		WLDEBUG		# enables verbose debugging output
 device wl0 at isa? port 0x300 net irq ? vector wlintr
 # We can (bogusly) include both the dedicated PCCARD drivers and the generic
 # support when COMPILING_LINT.
 device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr
 device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr
 
 #
 # ATM related options
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # atm pseudo-device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at 
 # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html
 #
 pseudo-device	atm
 device en0
 device en1
 options		NATM			#native ATM
 
 #
 # Audio drivers: `snd', `sb', `pas', `gus', `pca'
 #
 # snd: Voxware sound support code
 # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum
 # sbxvi: SoundBlaster 16
 # sbmidi: SoundBlaster 16 MIDI interface
 # pas: ProAudioSpectrum PCM and MIDI
 # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX
 # gusxvi: Gravis Ultrasound 16-bit PCM	(do not use)
 # mss: Microsoft Sound System
 # css: Crystal Sound System (CSS 423x PnP)
 # sscape: Ensoniq Soundscape MIDI interface
 # sscape_mss: Ensoniq Soundscape PCM (requires sscape)
 # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum
 # uart: stand-alone 6850 UART for MIDI
 # mpu: Roland MPU-401 stand-alone card
 # 
 # Beware!  The addresses specified below are also hard-coded in
 # i386/isa/sound/sound_config.h.  If you change the values here, you
 # must also change the values in the include file.
 #
 # pcm: PCM audio through various sound cards.
 #
 # This is the work in progress from Luigi Rizzo.  This has support for
 # CS423x based cards, OPTi931, SB16 PnP, GusPnP.  For more information
 # about this driver, take a look at sys/i386/isa/snd/README.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # This driver will use the new PnP code if it's available.
 #
 # pca: PCM audio through your PC speaker
 #
 # If you have a GUS-MAX card and want to use the CS4231 codec on the
 # card the drqs for the gus max must be 8 bit (1, 2, or 3).
 # 
 # If you would like to use the full duplex option on the gus, then define
 # flags to be the ``read dma channel''.
 #
 # options BROKEN_BUS_CLOCK	#PAS-16 isn't working and OPTI chipset
 # options SYMPHONY_PAS		#PAS-16 isn't working and SYMPHONY chipset
 # options EXCLUDE_SBPRO		#PAS-16
 # options SBC_IRQ=5		#PAS-16. Must match irq on sb0 line.
 # PAS16: The order of the pas0/sb0/opl0 is important since the
 #	sb emulation is enabled in the pas-16 attach.
 #
 # The i386/isa/sound/sound.doc has more information.
 
 # Controls all "VOXWARE" driver sound devices.  See Luigi's driver
 # below for an alternate which may work better for some cards.
 #
 controller	snd0
 device pas0     at isa? port 0x388 irq 10 drq 6 vector pasintr
 device sb0      at isa? port 0x220 irq 5 drq 1 vector sbintr
 device sbxvi0   at isa? drq 5
 device sbmidi0  at isa? port 0x330
 device awe0     at isa? port 0x620
 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr
 #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr
 device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr
 device css0	at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr
 device sscape0  at isa? port 0x330 irq 9 drq 0 vector sscapeintr
 device trix0    at isa? port 0x330 irq 6 drq 0 vector sscapeintr
 device sscape_mss0  at isa? port 0x534 irq 5 drq 1 vector sndintr
 device opl0     at isa? port 0x388
 device mpu0     at isa? port 0x330 irq 6 drq 0
 device uart0 at isa? port 0x330 irq 5 vector "m6850intr"
 
 # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!).
 # You may also wish to enable the pnp controller with this, for pnp
 # sound cards.
 #
 #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr
 
 # Not controlled by `snd'
 device pca0 at isa? port IO_TIMER1 tty
 
 #
 # Miscellaneous hardware:
 #
 # mcd: Mitsumi CD-ROM
 # scd: Sony CD-ROM
 # matcd: Matsushita/Panasonic CD-ROM
 # wt: Wangtek and Archive QIC-02/QIC-36 tape drives
 # ctx: Cortex-I frame grabber
 # apm: Laptop Advanced Power Management (experimental)
 # spigot: The Creative Labs Video Spigot video-acquisition board
 # meteor: Matrox Meteor video capture board
 # alog: Industrial Computer Source AIO8-P driver
 # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html)
 # cy: Cyclades serial driver
 # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
 # gp:  National Instruments AT-GPIB and AT-GPIB/TNT board
 # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
 # gsc: Genius GS-4500 hand scanner.
 # joy: joystick
 # labpc: National Instrument's Lab-PC and Lab-PC+
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA) - single card
 # tw: TW-523 power line interface for use with X-10 home control products
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
 # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
 
 #
 # Notes on APM
 #  The flags takes the following meaning for apm0:
 #    0x0020  Statclock is broken.
 #    0x0011  Limit APM protocol to 1.1 or 1.0
 #    0x0010  Limit APM protocol to 1.0
 #
 #
 # Notes on the spigot:
 #  The video spigot is at 0xad6.  This port address can not be changed.
 #  The irq values may only be 10, 11, or 15
 #  I/O memory is an 8kb region.  Possible values are:
 #    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
 #    The start address must be on an even boundary.
 #  Add the following option if you want to allow non-root users to be able
 #  to access the spigot.  This option is not secure because it allows users
 #  direct access to the I/O page. 
 #  	options SPIGOT_UNSECURE
 #
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #   Comtrol Rocketport ISA single card
 #               device  rp0     at isa? port 0x280 tty
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel configuration file:
 #
 #               device  rp0     at isa? port 0x100 tty
 #               device  rp1     at isa? port 0x180 tty
 #
 #   For 4 ISA cards, it might be something like this:
 #
 #               device  rp0     at isa? port 0x180 tty
 #               device  rp1     at isa? port 0x100 tty
 #               device  rp2     at isa? port 0x340 tty
 #               device  rp3     at isa? port 0x240 tty
 #
 #   And for PCI cards, you only need say:
 #
 #               device rp0
 #               device rp1
 #               ...
 #   Note: Make sure that any Rocketport PCI devices are specified BEFORE the
 #   ISA Rocketport devices.
 
 # Notes on the Digiboard driver:
 #
 # The following flag values have special meanings:
 #	0x01 - alternate layout of pins
 #	0x02 - use the windowed PC/Xe in 64K mode
 
 # Notes on the Specialix SI/XIO driver:
 #  **This is NOT a Specialix supported Driver!**
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 # Notes on the Stallion stl and stli drivers:
 #  See src/i386/isa/README.stl for complete instructions.
 #  This is version 0.0.5alpha, unsupported by Stallion.
 #  The stl driver has a secondary IO port hard coded at 0x280.  You need
 #     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
 #  The "flags" and "iosiz" settings on the stli driver depend on the board:
 #	EasyConnection 8/64 ISA:     flags 23         iosiz 0x1000
 #	EasyConnection 8/64 EISA:    flags 24         iosiz 0x10000
 #	EasyConnection 8/64 MCA:     flags 25         iosiz 0x1000
 #	ONboard ISA:                 flags 4          iosiz 0x10000
 #	ONboard EISA:                flags 7          iosiz 0x10000
 #	ONboard MCA:                 flags 3          iosiz 0x10000
 #	Brumby:                      flags 2          iosiz 0x4000
 #	Stallion:                    flags 1          iosiz 0x10000
 
 device		mcd0	at isa? port 0x300 bio irq 10 vector mcdintr
 # for the Sony CDU31/33A CDROM
 device		scd0	at isa? port 0x230 bio
 # for the SoundBlaster 16 multicd - up to 4 devices
 controller      matcd0  at isa? port 0x230 bio
 device		wt0	at isa? port 0x300 bio irq 5 drq 1 vector wtintr
 device		ctx0	at isa? port 0x230 iomem 0xd0000
 device		spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr
 device		apm0	at isa? 
 device		gp0	at isa? port 0x2c0 tty
 device		gsc0	at isa? port "IO_GSC1" tty drq 3
 device		joy0	at isa? port "IO_GAME"
 device          alog0   at isa? port 0x260 tty irq 5 vector alogintr
 device		cy0	at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr
 device		dgb0	at isa? port 0x220 iomem 0xfc0000 iosiz ? tty
 device		labpc0	at isa? port 0x260 tty irq 5 vector labpcintr
 device          rc0     at isa? port 0x220 tty irq 12 vector rcintr
 device          rp0     at isa? port 0x280 tty
 # the port and irq for tw0 are fictitious
 device          tw0     at isa? port 0x380 tty irq 11 vector twintr
 device		si0	at isa? iomem 0xd0000 tty irq 12 vector siintr
 device		asc0	at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr
 device		bqu0	at isa? port 0x150
 device		stl0	at isa? port 0x2a0 tty irq 10 vector stlintr
 device		stli0	at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000
 device		loran0	at isa? port ? tty irq 5 vector loranintr
 
 #
 # EISA devices:
 #
 # The EISA bus device is eisa0.  It provides auto-detection and
 # configuration support for all devices on the EISA bus.
 #
 # The `ahb' device provides support for the Adaptec 174X adapter.
 #
 # The `ahc' device provides support for the Adaptec 274X and 284X
 # adapters.  The 284X, although a VLB card responds to EISA probes.
 #
 # fea: DEC DEFEA EISA FDDI adapter
 #
 controller	eisa0
 controller	ahb0
 controller	ahc0
 device		fea0
 
 # enable tagged command queuing, which is a major performance win on
 # devices that support it (and controllers with enough SCB's)
 options	AHC_TAGENABLE
 
 # enable SCB paging - See the ahc.4 man page
 options	AHC_SCBPAGING_ENABLE
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options AHC_ALLOW_MEMIO
 
 # By default, only 10 EISA slots are probed, since the slot numbers
 # above clash with the configuration address space of the PCI subsystem,
 # and the EISA probe is not very smart about this.  This is sufficient
 # for most machines, but in particular the HP NetServer LC series comes
 # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
 # thus you need to bump this figure to 12 for them.
 options	"EISA_SLOTS=12"
 
 #
 # PCI devices:
 #
 # The main PCI bus device is `pci'.  It provides auto-detection and
 # configuration support for all devices on the PCI bus, using either
 # configuration mode defined in the PCI specification.
 #
 # The `ahc' device provides support for the Adaptec 29/3940(U)(W)
 # and motherboard based AIC7870/AIC7880 adapters.
 #
 # The `ncr' device provides support for the NCR 53C810 and 53C825
 # self-contained SCSI host adapters.
 #
 # The `amd' device provides support for the Tekram DC-390 and 390T
 # SCSI host adapters, but is expected to work with any AMD 53c974
 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some
 # local patches were applied to the sources (that had originally
 # been written by Tekram and limited to work with their SCSI cards).
 #
 # The `de' device provides support for the Digital Equipment DC21040
 # self-contained Ethernet adapter.
 #
 # The `fxp' device provides support for the Intel EtherExpress Pro/100B
 # PCI Fast Ethernet adapters.
 #
 # The `tx' device provides support for the SMC 9432TX cards.
 #
 # The `vx' device provides support for the 3Com 3C590 and 3C595
 # early support
 #
 # The `fpa' device provides support for the Digital DEFPA PCI FDDI
 # adapter. pseudo-device fddi is also needed.
 #
 # The `meteor' device is a PCI video capture board. It can also have the
 # following options:
 #   options METEOR_ALLOC_PAGES=xxx	preallocate kernel pages for data entry
 #	figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE
 #   options METEOR_DEALLOC_PAGES	remove all allocated pages on close(2)
 #   options METEOR_DEALLOC_ABOVE=xxx	remove all allocated pages above the
 #	specified amount. If this value is below the allocated amount no action
 #	taken
 #   option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used
 #	for initialization of fps routine when a signal is not present.
 #
 # The 'bktr' device is a PCI video capture board. It also has a TV tuner
 # on board.
 #
 controller	pci0
 controller	ahc1
 controller	ncr0
 controller	amd0
 device		de0
 device		fxp0
 device		tx0
 device		vx0
 device		fpa0
 device		meteor0
 device		bktr0
 
 
 #
 # PCCARD/PCMCIA
 #
 # card: slot controller
 # pcic: slots
 controller	card0
 controller	pcic0 at card?
 controller	pcic1 at card?
 
 #
 # Laptop/Notebook options:
 #
 # See also:
 #  apm under `Miscellaneous hardware'
 # above.
 
 # For older notebooks that signal a powerfail condition (external
 # power supply dropped, or battery state low) by issuing an NMI:
 
 options		POWERFAIL_NMI	# make it beep instead of panicing
 
 #
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'sd'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # nlpt	Parallel Printer
 # ppi	General-purpose I/O ("Geek Port")
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.  
 #
 controller	ppbus0
 controller	vpo0	at ppbus?
 device		nlpt0	at ppbus?
 device		ppi0	at ppbus?
 device		pps0	at ppbus?
 
 controller	ppc0	at isa? disable port ? irq 7 vector ppcintr
 
 # Kernel BOOTP support 
 
 options		BOOTP		# Use BOOTP to obtain IP address/hostname
 options		BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options		"BOOTP_NFSV3"	# Use NFS v3 to NFS mount root
 options		BOOTP_COMPAT	# Workaround for broken bootp daemons.
 
 #
 # An obsolete option to test kern_opt.c.
 #
 options		GATEWAY
 
 # If you want to disable loadable kernel modules (LKM), you
 # might want to use this option.
 #options		NO_LKM
 
 #
 # Add tie-ins for a hardware watchdog.  This only enable the hooks;
 # the user must still supply the actual driver.
 #
 options		HW_WDOG
 
 # More undocumented options for linting.
 
 options		CLK_CALIBRATION_LOOP
 options		"CLK_USE_I8254_CALIBRATION"
 options		CLK_USE_TSC_CALIBRATION
 options		CLUSTERDEBUG
 options		COMPAT_LINUX
 options		CPU_UPGRADE_HW_CACHE
 options		DEBUG
 options		"DEBUG_1284"
 options		DEVFS_ROOT
 #options	DISABLE_PSE
 options		"EXT2FS"
 options		"I586_PMC_GUPROF=0x70000"
 options		"IBCS2"
 # broken:
 #options		IPFILTER
 options		KEY
 options		KEY_DEBUG
 options		LOCKF_DEBUG
 options		LOUTB
 options		KBD_MAXRETRY=4
 options		KBD_MAXWAIT=6
 options		KBD_RESETDELAY=201
 options		KBDIO_DEBUG=2
 options		MSGMNB=2049
 options		MSGMNI=41
 options		MSGSEG=2049
 options		MSGSSZ=16
 options		MSGTQL=41
 options		NBUF=512
 options		NETATALKDEBUG
 options		NMBCLUSTERS=1024
 options		NPX_DEBUG
 options		NULLFS_DIAGNOSTIC
 options		PANIC_REBOOT_WAIT_TIME=16
 options		"PCVT_24LINESDEF"
 options		PCVT_CTRL_ALT_DEL
 options		PCVT_EMU_MOUSE
 options		PCVT_FREEBSD=211
 options		PCVT_META_ESC
 options		PCVT_NSCREENS=9
 options		PCVT_PRETTYSCRNS
 options		PCVT_SCANSET=2
 options		PCVT_SCREENSAVER
 options		PCVT_USEKBDSEC
 options		"PCVT_VT220KEYB"
 options		PSM_DEBUG=1
 options		"SCSI_2_DEF"
 options		SCSI_DELAY=8	# Be pessimistic about Joe SCSI device
 options		SCSI_NCR_DEBUG
 options		SCSI_NCR_DFLT_TAGS=4
 options		SCSI_NCR_MAX_SYNC=10000
 options		SCSI_NCR_MAX_WIDE=1
 options		SCSI_NCR_MYADDR=7
 options		SEMMAP=31
 options		SEMMNI=11
 options		SEMMNS=61
 options		SEMMNU=31
 options		SEMMSL=61
 options		SEMOPM=101
 options		SEMUME=11
 options		SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options		SHMALL=1025
 options		"SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)"
 options		SHMMAXPGS=1025
 options		SHMMIN=2
 options		SHMMNI=33
 options		SHMSEG=9
 options		SI_DEBUG
 options		SIMPLELOCK_DEBUG
 options		SPX_HACK
 
 # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O
 # See sys/dev/dpt for debugging and other subtle options.
 #   DPT_VERIFY_HINTR        Performs some strict hardware interrupts testing.
 #                           Only use if you suspect PCI bus corruption problems
 #   DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue
 #                           will grow to accomodate increased use.  This growth
 #                           will NOT shrink.  To restrict the number of queue
 #                           slots to exactly what the DPT can hold at one time,
 #                           enable this option.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  Assumed to be enabled by
 #                           /usr/sbin/dpt_* tools.
 #   DPT_FREELIST_IS_STACK   For optimat L{1,2} CPU cache utilization, enable
 #                           this option.  Otherwise, the transaction queue is
 #                           a LIFO.  I cannot measure the performance gain.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 
 controller      dpt0
 
 # DPT options
 options DPT_VERIFY_HINTR
 options DPT_RESTRICTED_FREELIST
 options DPT_MEASURE_PERFORMANCE
 options DPT_FREELIST_IS_STACK
 options DPT_HANDLE_TIMEOUTS
 options DPT_TIMEOUT_FACTOR=4
 options	DPT_INTR_DELAY=200      # Some motherboards need that
 options DPT_LOST_IRQ
Index: head/sys/i386/conf/NOTES
===================================================================
--- head/sys/i386/conf/NOTES	(revision 34265)
+++ head/sys/i386/conf/NOTES	(revision 34266)
@@ -1,1497 +1,1504 @@
 #
 # LINT -- config file for checking all the sources, tries to pull in
 #	as much of the source tree as it can.
 #
-#	$Id: LINT,v 1.412 1998/02/24 22:24:46 phk Exp $
+#	$Id: LINT,v 1.413 1998/02/27 10:02:41 itojun Exp $
 #
 # NB: You probably don't want to try running a kernel built from this
 # file.  Instead, you should start from GENERIC, and add options from
 # this file as required.
 #
 
 #
 # This directive is mandatory; it defines the architecture to be
 # configured for; in this case, the 386 family based IBM-PC and
 # compatibles.
 #
 machine		"i386"
 
 # 
 # This is the ``identification'' of the kernel.  Usually this should
 # be the same as the name of your kernel.
 #
 ident		LINT
 
 #
 # The `maxusers' parameter controls the static sizing of a number of
 # internal system tables by a complicated formula defined in param.c.
 #
 maxusers	10
 
 #
 # Certain applications can grow to be larger than the 128M limit
 # that FreeBSD initially imposes.  Below are some options to
 # allow that limit to grow to 256MB, and can be increased further
 # with changing the parameters.  MAXDSIZ is the maximum that the
 # limit can be set to, and the DFLDSIZ is the default value for
 # the limit.  You might want to set the default lower than the
 # max, and explicitly set the maximum with a shell command for processes
 # that regularly exceed the limit like INND.
 #
 options		"MAXDSIZ=(256*1024*1024)"
 options		"DFLDSIZ=(256*1024*1024)"
 
 # When this is set, be extra conservative in various parts of the kernel
 # and choose functionality over speed (on the widest variety of systems).
 options		FAILSAFE
 
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings /kernel | grep ^___ | sed -e 's/^___//' > MYKERNEL
 #
 options         INCLUDE_CONFIG_FILE     # Include this file in kernel
 
 #
 # This directive defines a number of things:
 #  - The compiled kernel is to be called `kernel'
 #  - The root filesystem might be on partition wd0a
 #  - Crash dumps will be written to wd0b, if possible.  Specifying the
 #    dump device here is not recommended.  Use dumpon(8).
 #
 config		kernel	root on wd0 dumps on wd0
 
 
 #####################################################################
 # SMP OPTIONS:
 #
 # SMP enables building of a Symmetric MultiProcessor Kernel.
 # APIC_IO enables the use of the IO APIC for Symmetric I/O.
 # NCPU sets the number of CPUs, defaults to 2.
 # NBUS sets the number of busses, defaults to 4.
 # NAPIC sets the number of IO APICs on the motherboard, defaults to 1.
 # NINTR sets the total number of INTs provided by the motherboard.
 #
 # Notes:
 #
 #  An SMP kernel will ONLY run on an Intel MP spec. qualified motherboard.
 #
 #  Be sure to disable 'cpu "I386_CPU"' && 'cpu "I486_CPU"' for SMP kernels.
 #
 #  Check the 'Rogue SMP hardware' section to see if additional options
 #   are required by your hardware.
 #
 
 # Mandatory:
 options		SMP			# Symmetric MultiProcessor Kernel
 options		APIC_IO			# Symmetric (APIC) I/O
 
 # Optional, these are the defaults plus 1:
 options		NCPU=5			# number of CPUs
 options		NBUS=5			# number of busses
 options		NAPIC=2			# number of IO APICs
 options		NINTR=25		# number of INTs
 
 #
 # Rogue SMP hardware:
 #
 
 # Bridged PCI cards:
 #
 # The MP tables of most of the current generation MP motherboards
 #  do NOT properly support bridged PCI cards.  To use one of these
 #  cards you should refer to ???
 
 
 #####################################################################
 # CPU OPTIONS
 
 #
 # You must specify at least one CPU (the one you intend to run on);
 # deleting the specification for CPUs you don't need to use may make
 # parts of the system run faster.  This is especially true removing
 # I386_CPU.
 #
 cpu		"I386_CPU"
 cpu		"I486_CPU"
 cpu		"I586_CPU"		# aka Pentium(tm)
 cpu		"I686_CPU"		# aka Pentium Pro(tm)
 
 #
 # Options for CPU features.
 #
 # CPU_BLUELIGHTNING_FPU_OP_CACHE enables FPU operand cache on IBM
 # BlueLightning CPU.  It works only with Cyrix FPU, and this option
 # should not be used with Intel FPU.
 #
 # CPU_BLUELIGHTNING_3X enables triple-clock mode on IBM Blue Lightning 
 # CPU if CPU supports it. The default is double-clock mode on
 # BlueLightning CPU box.  
 #
 # CPU_BTB_EN enables branch target buffer on Cyrix 5x86 (NOTE 1).
 #
 # CPU_DIRECT_MAPPED_CACHE sets L1 cache of Cyrix 486DLC CPU in direct
 # mapped mode.  Default is 2-way set associative mode.
 #
 # CPU_CYRIX_NO_LOCK enables weak locking for the entire address space
 # of Cyrix 6x86 and 6x86MX CPUs.  If this option is not set and
 # FAILESAFE is defined, NO_LOCK bit of CCR1 is cleared.  (NOTE 3)
 #
 # CPU_DISABLE_5X86_LSSER disables load store serialize (i.e. enables
 # reorder).  This option should not be used if you use memory mapped
 # I/O device(s). 
 #
 # CPU_FASTER_5X86_FPU enables faster FPU exception handler.
 #
 # CPU_I486_ON_386 enables CPU cache on i486 based CPU upgrade products
 # for i386 machines. 
 #
 # CPU_IORT defines I/O clock delay time (NOTE 1).  Default vaules of
 # I/O clock delay time on Cyrix 5x86 and 6x86 are 0 and 7,respectively
 # (no clock delay).
 #
 # CPU_LOOP_EN prevents flushing the prefetch buffer if the destination
 # of a jump is already present in the prefetch buffer on Cyrix 5x86(NOTE
 # 1). 
 #
 # CPU_RSTK_EN enables return stack on Cyrix 5x86 (NOTE 1).
 #
 # CPU_SUSP_HLT enables suspend on HALT.  If this option is set, CPU
 # enters suspend mode following execution of HALT instruction.
 #
 # CPU_WT_ALLOC enables write-through allocation.
 #
 # CYRIX_CACHE_WORKS enables CPU cache on Cyrix 486 CPUs with cache
 # flush at hold state.
 #
 # CYRIX_CACHE_REALLY_WORKS enables (1) CPU cache on Cyrix 486 CPUs
 # without cache flush at hold state, and (2) write-back CPU cache on
 # Cyrix 6x86 whose revision < 2.7 (NOTE 2).
 #
 # NO_F00F_HACK disables the hack that prevents Pentiums (and ONLY
 # Pentiums) from locking up when a LOCK CMPXCHG8B instruction is
 # executed.  This should be included for ALL kernels that won't run
 # on a Pentium.
 #
 # NOTE 1: The options, CPU_BTB_EN, CPU_LOOP_EN, CPU_IORT,
 # CPU_LOOP_ENand CPU_RSTK_EN should no be used becasue of CPU bugs.
 # These options may crash your system. 
 #
 # NOTE 2: If CYRIX_CACHE_REALLY_WORKS is not set, CPU cache is enabled
 # in write-through mode when revision < 2.7.  If revision of Cyrix
 # 6x86 >= 2.7, CPU cache is always enabled in write-back mode.
 #
 # NOTE 3: This option may cause failures for software that requires
 # locked cycles in order to operate correctly.
 #
 options		"CPU_BLUELIGHTNING_FPU_OP_CACHE"
 options		"CPU_BLUELIGHTNING_3X"
 options		"CPU_BTB_EN"
 options		"CPU_DIRECT_MAPPED_CACHE"
 options		"CPU_DISABLE_5X86_LSSER"
 options		"CPU_FASTER_5X86_FPU"
 options		"CPU_I486_ON_386"
 options		"CPU_IORT"
 options		"CPU_LOOP_EN"
 options		"CPU_RSTK_EN"
 options		"CPU_SUSP_HLT"
 options		"CYRIX_CACHE_WORKS"
 options		"CYRIX_CACHE_REALLY_WORKS"
 #options	"NO_F00F_HACK"
 
 #
 # A math emulator is mandatory if you wish to run on hardware which
 # does not have a floating-point processor.  Pick either the original,
 # bogus (but freely-distributable) math emulator, or a much more
 # fully-featured but GPL-licensed emulator taken from Linux.
 #
 options		MATH_EMULATE		#Support for x87 emulation
 # Don't enable both of these in a real config.
 options		GPL_MATH_EMULATE	#Support for x87 emulation via
 					#new math emulator 
 
 
 #####################################################################
 # COMPATIBILITY OPTIONS                                             
 
 #
 # Implement system calls compatible with 4.3BSD and older versions of
 # FreeBSD.  You probably do NOT want to remove this as much current code
 # still relies on the 4.3 emulation.
 #
 options		"COMPAT_43"
 
 #
 # Allow user-mode programs to manipulate their local descriptor tables.
 # This option is required for the WINE Windows(tm) emulator, and is
 # not used by anything else (that we know of).
 #
 options		USER_LDT		#allow user-level control of i386 ldt
 
 #
 # These three options provide support for System V Interface
 # Definition-style interprocess communication, in the form of shared
 # memory, semaphores, and message queues, respectively.
 #
 options		SYSVSHM
 options		SYSVSEM
 options		SYSVMSG
 
 #
 # This option includes a MD5 routine in the kernel, this is used for
 # various authentication and privacy uses.
 #
 options		"MD5"
 
 #
 # Allow processes to switch to vm86 mode, as well as enabling direct
 # user-mode access to the I/O port space.  This option is necessary for 
 # the doscmd emulator to run.
 #
 options		"VM86"
 
 
 #####################################################################
 # DEBUGGING OPTIONS
 
 #
 # Enable the kernel debugger.
 #
 options		DDB
 
 #
 # Don't drop into DDB for a panic. Intended for unattended operation
 # where you may want to drop to DDB from the console, but still want
 # the machine to recover from a panic
 #
 options		DDB_UNATTENDED
 
 #
 # If using GDB remote mode to debug the kernel, there's a non-standard
 # extension to the remote protocol that can be used to use the serial
 # port as both the debugging port and the system console.  It's non-
 # standard and you're on your own if you enable it.  See also the
 # "remotechat" variables in the FreeBSD specific version of gdb.
 #
 options		GDB_REMOTE_CHAT
 
 # 
 # KTRACE enables the system-call tracing facility ktrace(2).
 #
 options		KTRACE			#kernel tracing
 
 #
 # The DIAGNOSTIC option is used in a number of source files to enable
 # extra sanity checking of internal structures.  This support is not
 # enabled by default because of the extra time it would take to check
 # for these conditions, which can only occur as a result of
 # programming errors.
 #
 options		DIAGNOSTIC
 
 #
 # PERFMON causes the driver for Pentium/Pentium Pro performance counters
 # to be compiled.  See perfmon(4) for more information.
 #
 options		PERFMON
 
 
 #
 # This option let some drivers co-exist that can't co-exist in a running
 # system.  This is used to be able to compile all kernel code in one go for
 # quality assurance purposes (like this file, which the option takes it name
 # from.)
 #
 options COMPILING_LINT
 
 
 # XXX - this doesn't belong here.
 # Allow ordinary users to take the console - this is useful for X.
 options		UCONSOLE
 
 # XXX - this doesn't belong here either
 options		USERCONFIG		#boot -c editor
 options		USERCONFIG_BOOT		#imply -c and parse info area
 options		VISUAL_USERCONFIG	#visual boot -c editor
 
 #####################################################################
 # NETWORKING OPTIONS
 
 #
 # Protocol families:
 #  Only the INET (Internet) family is officially supported in FreeBSD.
 #  Source code for the NS (Xerox Network Service) is provided for amusement
 #  value.
 #
 options		INET			#Internet communications protocols
 
 options		IPX			#IPX/SPX communications protocols
 options		IPXIP			#IPX in IP encapsulation (not available)
 options		IPTUNNEL		#IP in IPX encapsulation (not available)
 
 options		NETATALK		#Appletalk communications protocols
 
 # These are currently broken but are shipped due to interest.
 #options		NS			#Xerox NS protocols
 
 # These are currently broken and are no longer shipped due to lack
 # of interest.
 #options		CCITT			#X.25 network layer
 #options		ISO
 #options		TPIP			#ISO TP class 4 over IP
 #options		TPCONS			#ISO TP class 0 over X.25
 #options		LLC			#X.25 link layer for Ethernets
 #options		HDLC			#X.25 link layer for serial lines
 #options		EON			#ISO CLNP over IP
 #options		NSIP			#XNS over IP
 
 #
 # Network interfaces:
 #  The `loop' pseudo-device is MANDATORY when networking is enabled.
 #  The `ether' pseudo-device provides generic code to handle
 #  Ethernets; it is MANDATORY when a Ethernet device driver is
 #  configured.
 #  The 'fddi' pseudo-device provides generic code to support FDDI.
 #  The `sppp' pseudo-device serves a similar role for certain types
 #  of synchronous PPP links (like `cx', `ar').
 #  The `sl' pseudo-device implements the Serial Line IP (SLIP) service.
 #  The `ppp' pseudo-device implements the Point-to-Point Protocol.
 #  The `bpfilter' pseudo-device enables the Berkeley Packet Filter.  Be
 #  aware of the legal and administrative consequences of enabling this
 #  option.  The number of devices determines the maximum number of
 #  simultaneous BPF clients programs runnable.
 #  The `disc' pseudo-device implements a minimal network interface,
 #  which throws away all packets sent and never receives any.  It is
 #  included for testing purposes.
 #  The `tun' pseudo-device implements the User Process PPP (iijppp)
 #
 # The PPP_BSDCOMP option enables support for compress(1) style entire
 # packet compression, the PPP_DEFLATE is for zlib/gzip style compression.
 # PPP_FILTER enables code for filtering the ppp data stream and selecting
 # events for resetting the demand dial activity timer - requires bpfilter.
 # See pppd(8) for more details.
 #
 pseudo-device	ether			#Generic Ethernet
 pseudo-device	fddi			#Generic FDDI
 pseudo-device	sppp			#Generic Synchronous PPP
 pseudo-device	loop			#Network loopback device
 pseudo-device	bpfilter	4	#Berkeley packet filter
 pseudo-device	disc			#Discard device
 pseudo-device	tun	1		#Tunnel driver (user process ppp(8))
 pseudo-device	sl	2		#Serial Line IP
 pseudo-device	ppp	2		#Point-to-point protocol
 options PPP_BSDCOMP			#PPP BSD-compress support
 options PPP_DEFLATE			#PPP zlib/deflate/gzip support
 options PPP_FILTER			#enable bpf filtering (needs bpfilter)
 
 #
 # Internet family options:
 #
 # TCP_COMPAT_42 causes the TCP code to emulate certain bugs present in
 # 4.2BSD.  This option should not be used unless you have a 4.2BSD
 # machine and TCP connections fail.
 #
 # MROUTING enables the kernel multicast packet forwarder, which works
 # with mrouted(8).
 #
 # IPFIREWALL enables support for IP firewall construction, in
 # conjunction with the `ipfw' program.  IPFIREWALL_VERBOSE sends
 # logged packets to the system logger.  IPFIREWALL_VERBOSE_LIMIT
 # limits the number of times a matching entry can be logged.
 #
 # WARNING:  IPFIREWALL defaults to a policy of "deny ip from any to any"
 # and if you do not add other rules during startup to allow access,
 # YOU WILL LOCK YOURSELF OUT.  It is suggested that you set firewall=open
 # in /etc/rc.conf when first enabling this feature, then refining the
 # firewall rules in /etc/rc.firewall after you've tested that the new kernel
 # feature works properly.
 #
 # IPFIREWALL_DEFAULT_TO_ACCEPT causes the default rule (at boot) to
 # allow everything.  Use with care, if a cracker can crash your
 # firewall machine, they can get to your protected machines.  However,
 # if you are using it as an as-needed filter for specific problems as
 # they arise, then this may be for you.  Changing the default to 'allow'
 # means that you won't get stuck if the kernel and /sbin/ipfw binary get
 # out of sync.
 #
 # IPDIVERT enables the divert IP sockets, used by ``ipfw divert''
 #
 # TCPDEBUG is undocumented.
 #
 options		"TCP_COMPAT_42"		#emulate 4.2BSD TCP bugs
 options		MROUTING		# Multicast routing
 options         IPFIREWALL              #firewall
 options         IPFIREWALL_VERBOSE      #print information about
 					# dropped packets
 options		"IPFIREWALL_VERBOSE_LIMIT=100" #limit verbosity
 options		IPFIREWALL_DEFAULT_TO_ACCEPT #allow everything by default
 options		IPDIVERT		#divert sockets
 options		TCPDEBUG
 
 
 #####################################################################
 # FILESYSTEM OPTIONS
 
 #
 # Only the root, /usr, and /tmp filesystems need be statically
 # compiled; everything else will be automatically loaded at mount
 # time.  (Exception: the UFS family---FFS, and MFS --- cannot
 # currently be demand-loaded.)  Some people still prefer to statically
 # compile other filesystems as well.
 #
 # NB: The NULL, PORTAL, UMAP and UNION filesystems are known to be
 # buggy, and WILL panic your system if you attempt to do anything with
 # them.  They are included here as an incentive for some enterprising
 # soul to sit down and fix them.
 #
 
 # One of these is mandatory:
 options		FFS			#Fast filesystem
 options		NFS			#Network File System
 
 # The rest are optional:
 # options	NFS_NOSERVER		#Disable the NFS-server code.
 options		"CD9660"		#ISO 9660 filesystem
 options		FDESC			#File descriptor filesystem
 options		KERNFS			#Kernel filesystem
 options		MFS			#Memory File System
 options		MSDOSFS			#MS DOS File System
 options		NULLFS			#NULL filesystem
 options		PORTAL			#Portal filesystem
 options		PROCFS			#Process filesystem
 options		UMAPFS			#UID map filesystem
 options		UNION			#Union filesystem
 options		"CD9660_ROOT"		#CD-ROM usable as root device
 options		FFS_ROOT		#FFS usable as root device
 options		NFS_ROOT		#NFS usable as root device
 # This DEVFS is experimental but seems to work
 options		DEVFS			#devices filesystem
+
+# Allow the FFS to use Softupdates technology.
+# To do this you need to fetch the two files
+# /sys/ufs/ffs/softdep.h and /sys/ufs/ffs/ffs_softdep.c
+# from freebsd.org and understand the licensing restrictions.
+#options		SOFTUPDATES
+# (we can't actually enable it because the files may not be present)
 
 # Make space in the kernel for a MFS root filesystem.  Define to the number
 # of kilobytes to reserve for the filesystem.
 options		MFS_ROOT=10
 # Allow the MFS_ROOT code to load the MFS image from floppy if it is missing.
 options		MFS_AUTOLOAD
 
 # Allow this many swap-devices.
 options		NSWAPDEV=20
 
 # Disk quotas are supported when this option is enabled.  If you
 # change the value of this option, you must do a `make clean' in your
 # kernel compile directory in order to get a working kernel.
 #
 options		QUOTA			#enable disk quotas
 
 # Add more checking code to various filesystems
 #options		NULLFS_DIAGNOSTIC
 #options		KERNFS_DIAGNOSTIC
 #options		UMAPFS_DIAGNOSTIC
 #options		UNION_DIAGNOSTIC
 
 # In particular multi-session CD-Rs might require a huge amount of
 # time in order to "settle".  If we are about mounting them as the
 # root f/s, we gotta wait a little.
 #
 # The number is supposed to be in seconds.
 options		"CD9660_ROOTDELAY=20"
 
 # If you are running a machine just as a fileserver for PC and MAC users.
 # (using SAMBA or Netatalk), then you may consider setting this option
 # and keeping all those user's directories on a partition that is mounted
 # with the suiddir option. This gives new files the same ownership as
 # the directory (similiar to group). It's a security hole if you let
 # these users run programs so confine it to file-servers, (but it'll save you
 # lots of headaches in that case). Root owned directories are excempt and X bits
 # are cleared. the suid bit must be set on the directory as well. see chmod(1)
 # PC owners can't see/set ownerships so they keep getting their toes
 # trodden on. This saves you all the support calls as the filesystem
 # it's used on will act as they expect. ("It's my dir so it must be my file").
 #
 options		SUIDDIR
 
 
 # Add some error checking code to the null_bypass routine
 # in the NULL filesystem
 #options		SAFETY
 
 
 #####################################################################
 # SCSI DEVICES
 
 # SCSI DEVICE CONFIGURATION
 
 # The SCSI subsystem consists of the `base' SCSI code, a number of
 # high-level SCSI device `type' drivers, and the low-level host-adapter
 # device drivers.  The host adapters are listed in the ISA and PCI
 # device configuration sections below.
 #
 # Beginning with FreeBSD 2.0.5 you can wire down your SCSI devices so
 # that a given bus, target, and LUN always come on line as the same
 # device unit.  In earlier versions the unit numbers were assigned
 # in the order that the devices were probed on the SCSI bus.  This
 # means that if you removed a disk drive, you may have had to rewrite
 # your /etc/fstab file, and also that you had to be careful when adding
 # a new disk as it may have been probed earlier and moved your device
 # configuration around.
 
 # This old behavior is maintained as the default behavior.  The unit
 # assignment begins with the first non-wired down unit for a device
 # type.  For example, if you wire a disk as "sd3" then the first
 # non-wired disk will be assigned sd4.
 
 # The syntax for wiring down devices is:
 
 # controller	scbus0 at ahc0		# Single bus device
 # controller	scbus1 at ahc1 bus 0	# Single bus device
 # controller	scbus3 at ahc2 bus 0	# Twin bus device
 # controller	scbus2 at ahc2 bus 1	# Twin bus device
 # disk 		sd0 at scbus0 target 0 unit 0
 # disk		sd1 at scbus3 target 1
 # disk		sd2 at scbus2 target 3
 # tape		st1 at scbus1 target 6
 # device	cd0 at scbus?
 
 # "units" (SCSI logical unit number) that are not specified are
 # treated as if specified as LUN 0.
 
 # All SCSI devices allocate as many units as are required.
 
 # The "unknown" device (uk? in pre-2.0.5) is now part of the base SCSI
 # configuration and doesn't have to be explicitly configured.
 
 controller	scbus0	#base SCSI code
 device		ch0	#SCSI media changers
 device		sd0	#SCSI disks
 device		st0	#SCSI tapes
 device		cd0	#SCSI CD-ROMs
 device		od0	#SCSI optical disk
 
 # The previous devices (ch, sd, st, cd) are recognized by config.
 # config doesn't (and shouldn't) know about these newer ones,
 # so we have to specify that they are on a SCSI bus with the "at scbus?"
 # clause.
 
 device worm0 at scbus?	# SCSI worm
 device pt0 at scbus?	# SCSI processor type
 device sctarg0 at scbus? # SCSI target
 
 # SCSI OPTIONS:
 
 # SCSIDEBUG: When defined enables debugging macros
 # NO_SCSI_SENSE: When defined disables sense descriptions (about 4k)
 # SCSI_REPORT_GEOMETRY: Always report disk geometry at boot up instead
 #                       of only when booting verbosely.
 options		SCSIDEBUG
 #options	NO_SCSI_SENSE
 options		SCSI_REPORT_GEOMETRY
 
 # Options for the `od' optical disk driver:
 #
 # If drive returns sense key as 0x02 with vendor specific additional
 # sense code (ASC) and additional sense code qualifier (ASCQ), or
 # illegal ASC and ASCQ. This cause an error (NOT READY) and retrying.
 # To suppress this, use the following option.
 #
 options		OD_BOGUS_NOT_READY
 #
 # For an automatic spindown, try this.  Again, preferably as an
 # option in your config file.
 # WARNING!  Use at your own risk.  Joerg's ancient SONY SMO drive
 # groks it fine, while Shunsuke's Fujitsu chokes on it and times
 # out.
 #
 options		OD_AUTO_TURNOFF
 
 
 
 #####################################################################
 # MISCELLANEOUS DEVICES AND OPTIONS
 
 # The `pty' device usually turns out to be ``effectively mandatory'',
 # as it is required for `telnetd', `rlogind', `screen', `emacs', and
 # `xterm', among others.
 
 pseudo-device	pty	16	#Pseudo ttys - can go as high as 256
 pseudo-device	speaker		#Play IBM BASIC-style noises out your speaker
 pseudo-device	gzip		#Exec gzipped a.out's
 pseudo-device	vn		#Vnode driver (turns a file into a device)
 pseudo-device	snp	3	#Snoop device - to look at pty/vty/etc..
 pseudo-device	ccd	4	#Concatenated disk driver
 
 # These are only for watching for bitrot in old tty code.
 # broken
 #pseudo-device	tb
 
 # These are only for watching for bitrot in old SCSI code.
 pseudo-device	su		#scsi user
 pseudo-device	ssc		#super scsi
 
 
 #####################################################################
 # HARDWARE DEVICE CONFIGURATION
 
 # ISA and EISA devices:
 # EISA support is available for some device, so they can be auto-probed.
 # Micro Channel is not supported at all.
 
 #
 # Mandatory ISA devices: isa, npx
 #
 controller	isa0
 
 #
 # Options for `isa':
 #
 # AUTO_EOI_1 enables the `automatic EOI' feature for the master 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # This option breaks suspend/resume on some portables.
 #
 # AUTO_EOI_2 enables the `automatic EOI' feature for the slave 8259A
 # interrupt controller.  This saves about 0.7-1.25 usec for each interrupt.
 # Automatic EOI is documented not to work for for the slave with the
 # original i8259A, but it works for some clones and some integrated
 # versions.
 #
 # BOUNCE_BUFFERS provides support for ISA DMA on machines with more
 # than 16 megabytes of memory.  It doesn't hurt on other machines.
 # Some broken EISA and VLB hardware may need this, too.
 #
 # MAXMEM specifies the amount of RAM on the machine; if this is not
 # specified, FreeBSD will first read the amount of memory from the CMOS
 # RAM, so the amount of memory will initially be limited to 64MB or 16MB
 # depending on the BIOS.  If the BIOS reports 64MB, a memory probe will
 # then attempt to detect the installed amount of RAM.  If this probe
 # fails to detect >64MB RAM you will have to use the MAXMEM option.
 # The amount is in kilobytes, so for a machine with 128MB of RAM, it would
 # be 131072 (128 * 1024).
 #
 # TUNE_1542 enables the automatic ISA bus speed selection for the
 # Adaptec 1542 boards. Does not work for all boards, use it with caution.
 #
 # BROKEN_KEYBOARD_RESET disables the use of the keyboard controller to
 # reset the CPU for reboot.  This is needed on some systems with broken
 # keyboard controllers.
 #
 # PAS_JOYSTICK_ENABLE enables the gameport on the ProAudio Spectrum
 
 options		"AUTO_EOI_1"
 #options	"AUTO_EOI_2"
 options		BOUNCE_BUFFERS
 options		"MAXMEM=(128*1024)"
 options 	"TUNE_1542"
 #options	BROKEN_KEYBOARD_RESET
 #options	PAS_JOYSTICK_ENABLE
 
 # Enable support for the kernel PLL to use an external PPS signal,
 # under supervision of [x]ntpd(8)
 # More info in ftp://ftp.udel.edu/pub/ntp/kernel.tar.Z
 
 options		PPS_SYNC
 
 # Enable PnP support in the kernel.  This allows you to automaticly
 # attach to PnP cards for drivers that support it and allows you to
 # configure cards from USERCONFIG.  See pnp(4) for more info.
 controller	pnp0
 
 # The pcvt console driver (vt220 compatible).
 device		vt0	at isa? port "IO_KBD" tty irq 1 vector pcrint
 options		XSERVER			# support for running an X server.
 options		FAT_CURSOR		# start with block cursor
 # This PCVT option is for keyboards such as those used on IBM ThinkPad laptops
 options		PCVT_SCANSET=2 		# IBM keyboards are non-std
 
 # The syscons console driver (sco color console compatible).
 device		sc0	at isa? port "IO_KBD" tty irq 1 vector scintr
 options		MAXCONS=16		# number of virtual consoles
 options		SLOW_VGA		# do byte-wide i/o's to TS and GDC regs
 options		"STD8X16FONT"		# Compile font in
 makeoptions	"STD8X16FONT"="cp850"
 options		SC_HISTORY_SIZE=200	# number of history buffer lines
 
 #
 # `flags' for sc0:
 #       0x01    Use a 'visual' bell
 #       0x02    Use a 'blink' cursor
 #       0x04    Use a 'underline' cursor
 #       0x06    Use a 'blinking underline' (destructive) cursor
 #       0x08    Force detection of keyboard, else we always assume a keyboard
 #       0x10    Old-style (XT) keyboard support, useful for older ThinkPads
 #       0x20    Don't reset keyboard, useful for some newer ThinkPads
 
 #
 # The Numeric Processing eXtension driver.  This should be configured if
 # your machine has a math co-processor, unless the coprocessor is very
 # buggy. If it is not configured then you *must* configure math emulation
 # (see above).  If both npx0 and emulation are configured, then only npx0
 # is used (provided it works).
 device		npx0	at isa? port "IO_NPX" iosiz 0x0 flags 0x0 irq 13 vector npxintr
 
 #
 # `flags' for npx0:
 #	0x01	don't use the npx registers to optimize bcopy
 #	0x02	don't use the npx registers to optimize bzero
 #	0x04	don't use the npx registers to optimize copyin or copyout.
 # The npx registers are normally used to optimize copying and zeroing when
 # all of the following conditions are satisfied:
 #	"I586_CPU" is an option
 #	the cpu is an i586 (perhaps not a Pentium)
 #	the probe for npx0 succeeds
 #	INT 16 exception handling works.
 # Then copying and zeroing using the npx registers is normally 30-100% faster.
 # The flags can be used to control cases where it doesn't work or is slower.
 # Setting them at boot time using userconfig works right (the optimizations
 # are not used until later in the bootstrap when npx0 is attached).
 #
 
 #
 # `iosiz' for npx0:
 # This can be used instead of the MAXMEM option to set the memory size.  If
 # it is nonzero, then it overrides both the MAXMEM option and the memory
 # size reported by the BIOS.  Setting it at boot time using userconfig takes
 # effect on the next reboot after the change has been recorded in the kernel
 # binary (the size is used early in the boot before userconfig has a chance
 # to change it).
 #
 
 #
 # Optional ISA and EISA devices:
 #
 
 #
 # SCSI host adapters: `aha', `aic', `bt', `nca'
 #
 # aha: Adaptec 154x
 # ahc: Adaptec 274x/284x/294x
 # aic: Adaptec 152x and sound cards using the Adaptec AIC-6360 (slow!)
 # bt: Most Buslogic controllers
 # nca: ProAudioSpectrum cards using the NCR 5380 or Trantor T130
 # uha: UltraStore 14F and 34F
 # sea: Seagate ST01/02 8 bit controller (slow!)
 # wds: Western Digital WD7000 controller (no scatter/gather!).
 #
 # Note that the order is important in order for Buslogic cards to be
 # probed correctly.
 #
 
 controller	bt0	at isa? port "IO_BT0" bio irq ? vector bt_isa_intr
 controller	aha0	at isa? port "IO_AHA0" bio irq ? drq 5 vector ahaintr
 controller	uha0	at isa? port "IO_UHA0" bio irq ? drq 5 vector uhaintr
 
 controller      aic0    at isa? port 0x340 bio irq 11 vector aicintr
 controller	nca0	at isa? port 0x1f88 bio irq 10 vector ncaintr
 controller	nca1	at isa? port 0x1f84
 controller	nca2	at isa? port 0x1f8c
 controller	nca3	at isa? port 0x1e88
 controller	nca4	at isa? port 0x350 bio irq 5 vector ncaintr
 
 controller	sea0	at isa? bio irq 5 iomem 0xdc000 iosiz 0x2000 vector seaintr
 controller	wds0	at isa? port 0x350 bio irq 15 drq 6 vector wdsintr
 
 #
 # ST-506, ESDI, and IDE hard disks: `wdc' and `wd'
 #
 # The flags fields are used to enable the multi-sector I/O and
 # the 32BIT I/O modes.  The flags may be used in either the controller
 # definition or in the individual disk definitions.  The controller
 # definition is supported for the boot configuration stuff.
 #
 # Each drive has a 16 bit flags value defined:
 #	The low 8 bits are the maximum value for the multi-sector I/O,
 #	where 0xff defaults to the maximum that the drive can handle.
 #	The high bit of the 16 bit flags (0x8000) allows probing for
 #	32 bit transfers.  Bit 14 (0x4000) enables a hack to wake
 #	up powered-down laptop drives.  Bit 13 (0x2000) allows
 #	probing for PCI IDE DMA controllers, such as Intel's PIIX
 #	south bridges.  See the wd.4 man page.
 #
 # The flags field for the drives can be specified in the controller
 # specification with the low 16 bits for drive 0, and the high 16 bits
 # for drive 1.
 # e.g.:
 #controller	wdc0	at isa? port "IO_WD1" bio irq 14 flags 0x00ff8004 vector wdintr
 #
 # specifies that drive 0 will be allowed to probe for 32 bit transfers and
 # a maximum multi-sector transfer of 4 sectors, and drive 1 will not be
 # allowed to probe for 32 bit transfers, but will allow multi-sector
 # transfers up to the maximum that the drive supports.
 #
 # If you are using a PCI controller that is not running in compatibility
 # mode (for example, it is a 2nd IDE PCI interface), then use config line(s)
 # such as:
 #
 #controller	wdc2	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd4	at wdc2 drive 0
 #disk		wd5	at wdc2 drive 1
 #
 #controller	wdc3	at isa? port "0" bio irq ? flags 0xa0ffa0ff vector wdintr
 #disk		wd6	at wdc3 drive 0
 #disk		wd7	at wdc3 drive 1
 #
 # Note that the above config would be useful for a Promise card, when used
 # on a MB that already has a PIIX controller.  Note the bogus irq and port
 # entries.  These are automatically filled in by the IDE/PCI support.
 #
 
 controller	wdc0	at isa? port "IO_WD1" bio irq 14 vector wdintr
 disk		wd0	at wdc0 drive 0
 disk		wd1	at wdc0 drive 1
 controller	wdc1	at isa? port "IO_WD2" bio irq 15 vector wdintr
 disk		wd2	at wdc1 drive 0
 disk		wd3	at wdc1 drive 1
 
 #
 # Options for `wdc':
 #
 # CMD640 enables serializing access to primary and secondary channel
 # of the CMD640B IDE Chip. The serializing will only take place
 # if this option is set *and* the chip is probed by the pci-system.
 #
 options         "CMD640"	#Enable work around for CMD640 h/w bug
 #
 # ATAPI enables the support for ATAPI-compatible IDE devices
 #
 options         ATAPI   #Enable ATAPI support for IDE bus
 options		ATAPI_STATIC	#Don't do it as an LKM
 
 # IDE CD-ROM driver - requires wdc controller and ATAPI option
 device          wcd0
 
 # IDE floppy driver - requires wdc controller and ATAPI option
 device          wfd0
 
 
 #
 # Standard floppy disk controllers and floppy tapes: `fdc', `fd', and `ft'
 #
 controller	fdc0	at isa? port "IO_FD1" bio irq 6 drq 2 vector fdintr
 #
 # FDC_DEBUG enables floppy debugging.  Since the debug output is huge, you
 # gotta turn it actually on by setting the variable fd_debug with DDB,
 # however.
 options		FDC_DEBUG
 # This option is undocumented on purpose.
 options		FDC_PRINT_BOGUS_CHIPTYPE
 #
 # Activate this line instead of the fdc0 line above if you happen to
 # have an Insight floppy tape.  Probing them proved to be dangerous
 # for people with floppy disks only, so it's "hidden" behind a flag:
 #controller fdc0 at isa? port "IO_FD1" bio flags 1 irq 6 drq 2 vector fdintr
 
 disk		fd0	at fdc0 drive 0
 disk		fd1	at fdc0 drive 1
 tape		ft0	at fdc0 drive 2
 
 
 #
 # Other standard PC hardware: `lpt', `mse', `psm', `sio', etc.
 #
 # lpt: printer port
 #	lpt specials:
 #		port can be specified as ?, this will cause the driver to scan
 #		the BIOS port list;
 #		the irq and vector clauses may be omitted, this
 #		will force the port into polling mode.
 # mse: Logitech and ATI InPort bus mouse ports
 # psm: PS/2 mouse port [note: conflicts with sc0/vt0, thus "conflicts" keywd]
 # sio: serial ports (see sio(4))
 
 device		lpt0	at isa? port? tty irq 7 vector lptintr
 device		lpt1	at isa? port "IO_LPT3" tty irq 5 vector lptintr
 device		mse0	at isa? port 0x23c tty irq 5 vector mseintr
 device		psm0	at isa? port "IO_KBD" conflicts tty irq 12 vector psmintr
 
 # Options for psm:
 options		PSM_HOOKAPM		#hook the APM resume event, useful
 					#for some laptops
 options		PSM_RESETAFTERSUSPEND	#reset the device at the resume event
 
 device		sio0	at isa? port "IO_COM1" tty flags 0x10 irq 4 vector siointr
 
 #
 # `flags' for serial drivers that support consoles (only for sio now):
 #	0x10	enable console support for this unit.  The other console flags
 #		are ignored unless this is set.  Enabling console support does
 #		not make the unit the preferred console - boot with -h or set
 #		the 0x20 flag for that.  Currently, at most one unit can have
 #		console support; the first one (in config file order) with
 #		this flag set is preferred.  Setting this flag for sio0 gives
 #		the old behaviour.
 #	0x20	force this unit to be the console (unless there is another
 #		higher priority console).  This replaces the COMCONSOLE option.
 #	0x40	reserve this unit for low level console operations.  Do not
 #
 # PnP `flags' (set via userconfig using pnp x flags y)
 #	0x1	disable probing of this device.  Used to prevent your modem
 #		from being attached as a PnP modem.
 #
 
 # Options for serial drivers that support consoles (only for sio now):
 options		BREAK_TO_DEBUGGER	#a BREAK on a comconsole goes to 
 					#DDB, if available.
 options		CONSPEED=9600		#default speed for serial console (default 9600)
 
 # Options for sio:
 options		COM_ESP			#code for Hayes ESP
 options		COM_MULTIPORT		#code for some cards with shared IRQs
 options		DSI_SOFT_MODEM		#code for DSI Softmodems
 options		"EXTRA_SIO=2"		#number of extra sio ports to allocate
 
 # Other flags for sio that aren't documented in the man page.
 #	0x20000	enable hardware RTS/CTS and larger FIFOs.  Only works for
 #		ST16650A-compatible UARTs.
 
 #
 # Network interfaces: `cx', `ed', `el', `ep', `ie', `is', `le', `lnc'
 #
 # ar: Arnet SYNC/570i hdlc sync 2/4 port V.35/X.21 serial driver (requires sppp)
 # cx: Cronyx/Sigma multiport sync/async (with Cisco or PPP framing)
 # ed: Western Digital and SMC 80xx; Novell NE1000 and NE2000; 3Com 3C503
 # el: 3Com 3C501 (slow!)
 # ep: 3Com 3C509 (buggy)
 # fe: Fujitsu MB86960A/MB86965A Ethernet
 # ie: AT&T StarLAN 10 and EN100; 3Com 3C507; unknown NI5210; Intel EtherExpress
 # le: Digital Equipment EtherWorks 2 and EtherWorks 3 (DEPCA, DE100,
 #     DE101, DE200, DE201, DE202, DE203, DE204, DE205, DE422)
 # lnc: Lance/PCnet cards (Isolan, Novell NE2100, NE32-VL)
 # sr: RISCom/N2 hdlc sync 1/2 port V.35/X.21 serial driver (requires sppp)
 # wl: Lucent Wavelan (ISA card only).
 # ze: IBM/National Semiconductor PCMCIA ethernet controller.
 # zp: 3Com PCMCIA Etherlink III (It does not require shared memory for
 #     send/receive operation, but it needs 'iomem' to read/write the
 #     attribute memory)
 #
 
 device ar0 at isa? port 0x300 net irq 10 iomem 0xd0000 vector arintr
 device cx0 at isa? port 0x240 net irq 15 drq 7 vector cxintr
 device ed0 at isa? port 0x280 net irq 5 iomem 0xd8000 vector edintr
 device eg0 at isa? port 0x310 net irq 5 vector egintr
 device el0 at isa? port 0x300 net irq 9 vector elintr
 device ep0 at isa? port 0x300 net irq 10 vector epintr
 device ex0 at isa? port? net irq? vector exintr
 device fe0 at isa? port 0x300 net irq ? vector feintr
 device ie0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector ieintr
 device ie1 at isa? port 0x360 net irq 7 iomem 0xd0000 vector ieintr
 device le0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector le_intr
 device lnc0 at isa? port 0x300 net irq 10 drq 0 vector lncintr
 device sr0 at isa? port 0x300 net irq 5 iomem 0xd0000 vector srintr
 options		WLCACHE		# enables the signal-strength cache
 options		WLDEBUG		# enables verbose debugging output
 device wl0 at isa? port 0x300 net irq ? vector wlintr
 # We can (bogusly) include both the dedicated PCCARD drivers and the generic
 # support when COMPILING_LINT.
 device ze0 at isa? port 0x300 net irq 5 iomem 0xd8000 vector zeintr
 device zp0 at isa? port 0x300 net irq 10 iomem 0xd8000 vector zpintr
 
 #
 # ATM related options
 #
 # The `en' device provides support for Efficient Networks (ENI)
 # ENI-155 PCI midway cards, and the Adaptec 155Mbps PCI ATM cards (ANA-59x0).
 #
 # atm pseudo-device provides generic atm functions and is required for
 # atm devices.
 # NATM enables the netnatm protocol family that can be used to
 # bypass TCP/IP.
 #
 # the current driver supports only PVC operations (no atm-arp, no multicast).
 # for more details, please read the original documents at 
 # http://www.ccrc.wustl.edu/pub/chuck/bsdatm/wucs.html
 #
 pseudo-device	atm
 device en0
 device en1
 options		NATM			#native ATM
 
 #
 # Audio drivers: `snd', `sb', `pas', `gus', `pca'
 #
 # snd: Voxware sound support code
 # sb: SoundBlaster PCM - SoundBlaster, SB Pro, SB16, ProAudioSpectrum
 # sbxvi: SoundBlaster 16
 # sbmidi: SoundBlaster 16 MIDI interface
 # pas: ProAudioSpectrum PCM and MIDI
 # gus: Gravis Ultrasound - Ultrasound, Ultrasound 16, Ultrasound MAX
 # gusxvi: Gravis Ultrasound 16-bit PCM	(do not use)
 # mss: Microsoft Sound System
 # css: Crystal Sound System (CSS 423x PnP)
 # sscape: Ensoniq Soundscape MIDI interface
 # sscape_mss: Ensoniq Soundscape PCM (requires sscape)
 # opl: Yamaha OPL-2 and OPL-3 FM - SB, SB Pro, SB 16, ProAudioSpectrum
 # uart: stand-alone 6850 UART for MIDI
 # mpu: Roland MPU-401 stand-alone card
 # 
 # Beware!  The addresses specified below are also hard-coded in
 # i386/isa/sound/sound_config.h.  If you change the values here, you
 # must also change the values in the include file.
 #
 # pcm: PCM audio through various sound cards.
 #
 # This is the work in progress from Luigi Rizzo.  This has support for
 # CS423x based cards, OPTi931, SB16 PnP, GusPnP.  For more information
 # about this driver, take a look at sys/i386/isa/snd/README.
 #
 # The flags of the device tells the device a bit more info about the
 # device that normally is obtained through the PnP interface.
 #	bit  2..0   secondary DMA channel;
 #	bit  4      set if the board uses two dma channels;
 #	bit 15..8   board type, overrides autodetection; leave it
 #		    zero if don't know what to put in (and you don't,
 #		    since this is unsupported at the moment...).
 #
 # This driver will use the new PnP code if it's available.
 #
 # pca: PCM audio through your PC speaker
 #
 # If you have a GUS-MAX card and want to use the CS4231 codec on the
 # card the drqs for the gus max must be 8 bit (1, 2, or 3).
 # 
 # If you would like to use the full duplex option on the gus, then define
 # flags to be the ``read dma channel''.
 #
 # options BROKEN_BUS_CLOCK	#PAS-16 isn't working and OPTI chipset
 # options SYMPHONY_PAS		#PAS-16 isn't working and SYMPHONY chipset
 # options EXCLUDE_SBPRO		#PAS-16
 # options SBC_IRQ=5		#PAS-16. Must match irq on sb0 line.
 # PAS16: The order of the pas0/sb0/opl0 is important since the
 #	sb emulation is enabled in the pas-16 attach.
 #
 # The i386/isa/sound/sound.doc has more information.
 
 # Controls all "VOXWARE" driver sound devices.  See Luigi's driver
 # below for an alternate which may work better for some cards.
 #
 controller	snd0
 device pas0     at isa? port 0x388 irq 10 drq 6 vector pasintr
 device sb0      at isa? port 0x220 irq 5 drq 1 vector sbintr
 device sbxvi0   at isa? drq 5
 device sbmidi0  at isa? port 0x330
 device awe0     at isa? port 0x620
 device gus0 at isa? port 0x220 irq 12 drq 1 vector gusintr
 #device gus0 at isa? port 0x220 irq 12 drq 1 flags 0x3 vector gusintr
 device mss0 at isa? port 0x530 irq 10 drq 1 vector adintr
 device css0	at isa? port 0x534 irq 5 drq 1 flags 0x08 vector adintr
 device sscape0  at isa? port 0x330 irq 9 drq 0 vector sscapeintr
 device trix0    at isa? port 0x330 irq 6 drq 0 vector sscapeintr
 device sscape_mss0  at isa? port 0x534 irq 5 drq 1 vector sndintr
 device opl0     at isa? port 0x388
 device mpu0     at isa? port 0x330 irq 6 drq 0
 device uart0 at isa? port 0x330 irq 5 vector "m6850intr"
 
 # Luigi's snd code (use INSTEAD of snd0 and all VOXWARE drivers!).
 # You may also wish to enable the pnp controller with this, for pnp
 # sound cards.
 #
 #device pcm0 at isa? port ? tty irq 10 drq 1 flags 0x0 vector pcmintr
 
 # Not controlled by `snd'
 device pca0 at isa? port IO_TIMER1 tty
 
 #
 # Miscellaneous hardware:
 #
 # mcd: Mitsumi CD-ROM
 # scd: Sony CD-ROM
 # matcd: Matsushita/Panasonic CD-ROM
 # wt: Wangtek and Archive QIC-02/QIC-36 tape drives
 # ctx: Cortex-I frame grabber
 # apm: Laptop Advanced Power Management (experimental)
 # spigot: The Creative Labs Video Spigot video-acquisition board
 # meteor: Matrox Meteor video capture board
 # alog: Industrial Computer Source AIO8-P driver
 # bktr: Bt848 capture boards (http://www.freebsd.org/~fsmp/HomeAuto/Bt848.html)
 # cy: Cyclades serial driver
 # dgb: Digiboard PC/Xi and PC/Xe series driver (ALPHA QUALITY!)
 # gp:  National Instruments AT-GPIB and AT-GPIB/TNT board
 # asc: GI1904-based hand scanners, e.g. the Trust Amiscan Grey
 # gsc: Genius GS-4500 hand scanner.
 # joy: joystick
 # labpc: National Instrument's Lab-PC and Lab-PC+
 # rc: RISCom/8 multiport card
 # rp: Comtrol Rocketport(ISA) - single card
 # tw: TW-523 power line interface for use with X-10 home control products
 # si: Specialix SI/XIO 4-32 port terminal multiplexor
 # stl: Stallion EasyIO and EasyConnection 8/32 (cd1400 based)
 # stli: Stallion EasyConnection 8/64, ONboard, Brumby (intelligent)
 
 #
 # Notes on APM
 #  The flags takes the following meaning for apm0:
 #    0x0020  Statclock is broken.
 #    0x0011  Limit APM protocol to 1.1 or 1.0
 #    0x0010  Limit APM protocol to 1.0
 #
 #
 # Notes on the spigot:
 #  The video spigot is at 0xad6.  This port address can not be changed.
 #  The irq values may only be 10, 11, or 15
 #  I/O memory is an 8kb region.  Possible values are:
 #    0a0000, 0a2000, ..., 0fffff, f00000, f02000, ..., ffffff
 #    The start address must be on an even boundary.
 #  Add the following option if you want to allow non-root users to be able
 #  to access the spigot.  This option is not secure because it allows users
 #  direct access to the I/O page. 
 #  	options SPIGOT_UNSECURE
 #
 
 # Notes on the Comtrol Rocketport driver:
 #
 # The exact values used for rp0 depend on how many boards you have
 # in the system.  The manufacturer's sample configs are listed as:
 #
 #   Comtrol Rocketport ISA single card
 #               device  rp0     at isa? port 0x280 tty
 #
 #   If instead you have two ISA cards, one installed at 0x100 and the
 #   second installed at 0x180, then you should add the following to
 #   your kernel configuration file:
 #
 #               device  rp0     at isa? port 0x100 tty
 #               device  rp1     at isa? port 0x180 tty
 #
 #   For 4 ISA cards, it might be something like this:
 #
 #               device  rp0     at isa? port 0x180 tty
 #               device  rp1     at isa? port 0x100 tty
 #               device  rp2     at isa? port 0x340 tty
 #               device  rp3     at isa? port 0x240 tty
 #
 #   And for PCI cards, you only need say:
 #
 #               device rp0
 #               device rp1
 #               ...
 #   Note: Make sure that any Rocketport PCI devices are specified BEFORE the
 #   ISA Rocketport devices.
 
 # Notes on the Digiboard driver:
 #
 # The following flag values have special meanings:
 #	0x01 - alternate layout of pins
 #	0x02 - use the windowed PC/Xe in 64K mode
 
 # Notes on the Specialix SI/XIO driver:
 #  **This is NOT a Specialix supported Driver!**
 #  The host card is memory, not IO mapped.
 #  The Rev 1 host cards use a 64K chunk, on a 32K boundary.
 #  The Rev 2 host cards use a 32K chunk, on a 32K boundary.
 #  The cards can use an IRQ of 11, 12 or 15.
 
 # Notes on the Stallion stl and stli drivers:
 #  See src/i386/isa/README.stl for complete instructions.
 #  This is version 0.0.5alpha, unsupported by Stallion.
 #  The stl driver has a secondary IO port hard coded at 0x280.  You need
 #     to change src/i386/isa/stallion.c if you reconfigure this on the boards.
 #  The "flags" and "iosiz" settings on the stli driver depend on the board:
 #	EasyConnection 8/64 ISA:     flags 23         iosiz 0x1000
 #	EasyConnection 8/64 EISA:    flags 24         iosiz 0x10000
 #	EasyConnection 8/64 MCA:     flags 25         iosiz 0x1000
 #	ONboard ISA:                 flags 4          iosiz 0x10000
 #	ONboard EISA:                flags 7          iosiz 0x10000
 #	ONboard MCA:                 flags 3          iosiz 0x10000
 #	Brumby:                      flags 2          iosiz 0x4000
 #	Stallion:                    flags 1          iosiz 0x10000
 
 device		mcd0	at isa? port 0x300 bio irq 10 vector mcdintr
 # for the Sony CDU31/33A CDROM
 device		scd0	at isa? port 0x230 bio
 # for the SoundBlaster 16 multicd - up to 4 devices
 controller      matcd0  at isa? port 0x230 bio
 device		wt0	at isa? port 0x300 bio irq 5 drq 1 vector wtintr
 device		ctx0	at isa? port 0x230 iomem 0xd0000
 device		spigot0 at isa? port 0xad6 irq 15 iomem 0xee000 vector spigintr
 device		apm0	at isa? 
 device		gp0	at isa? port 0x2c0 tty
 device		gsc0	at isa? port "IO_GSC1" tty drq 3
 device		joy0	at isa? port "IO_GAME"
 device          alog0   at isa? port 0x260 tty irq 5 vector alogintr
 device		cy0	at isa? tty irq 10 iomem 0xd4000 iosiz 0x2000 vector cyintr
 device		dgb0	at isa? port 0x220 iomem 0xfc0000 iosiz ? tty
 device		labpc0	at isa? port 0x260 tty irq 5 vector labpcintr
 device          rc0     at isa? port 0x220 tty irq 12 vector rcintr
 device          rp0     at isa? port 0x280 tty
 # the port and irq for tw0 are fictitious
 device          tw0     at isa? port 0x380 tty irq 11 vector twintr
 device		si0	at isa? iomem 0xd0000 tty irq 12 vector siintr
 device		asc0	at isa? port IO_ASC1 tty drq 3 irq 10 vector ascintr
 device		bqu0	at isa? port 0x150
 device		stl0	at isa? port 0x2a0 tty irq 10 vector stlintr
 device		stli0	at isa? port 0x2a0 tty iomem 0xcc000 flags 23 iosiz 0x1000
 device		loran0	at isa? port ? tty irq 5 vector loranintr
 
 #
 # EISA devices:
 #
 # The EISA bus device is eisa0.  It provides auto-detection and
 # configuration support for all devices on the EISA bus.
 #
 # The `ahb' device provides support for the Adaptec 174X adapter.
 #
 # The `ahc' device provides support for the Adaptec 274X and 284X
 # adapters.  The 284X, although a VLB card responds to EISA probes.
 #
 # fea: DEC DEFEA EISA FDDI adapter
 #
 controller	eisa0
 controller	ahb0
 controller	ahc0
 device		fea0
 
 # enable tagged command queuing, which is a major performance win on
 # devices that support it (and controllers with enough SCB's)
 options	AHC_TAGENABLE
 
 # enable SCB paging - See the ahc.4 man page
 options	AHC_SCBPAGING_ENABLE
 
 # The aic7xxx driver will attempt to use memory mapped I/O for all PCI
 # controllers that have it configured only if this option is set. Unfortunately,
 # this doesn't work on some motherboards, which prevents it from being the
 # default.
 options AHC_ALLOW_MEMIO
 
 # By default, only 10 EISA slots are probed, since the slot numbers
 # above clash with the configuration address space of the PCI subsystem,
 # and the EISA probe is not very smart about this.  This is sufficient
 # for most machines, but in particular the HP NetServer LC series comes
 # with an onboard AIC7770 dual-channel SCSI controller on EISA slot #11,
 # thus you need to bump this figure to 12 for them.
 options	"EISA_SLOTS=12"
 
 #
 # PCI devices:
 #
 # The main PCI bus device is `pci'.  It provides auto-detection and
 # configuration support for all devices on the PCI bus, using either
 # configuration mode defined in the PCI specification.
 #
 # The `ahc' device provides support for the Adaptec 29/3940(U)(W)
 # and motherboard based AIC7870/AIC7880 adapters.
 #
 # The `ncr' device provides support for the NCR 53C810 and 53C825
 # self-contained SCSI host adapters.
 #
 # The `amd' device provides support for the Tekram DC-390 and 390T
 # SCSI host adapters, but is expected to work with any AMD 53c974
 # PCI SCSI chip and the AMD Ethernet+SCSI Combo chip, after some
 # local patches were applied to the sources (that had originally
 # been written by Tekram and limited to work with their SCSI cards).
 #
 # The `de' device provides support for the Digital Equipment DC21040
 # self-contained Ethernet adapter.
 #
 # The `fxp' device provides support for the Intel EtherExpress Pro/100B
 # PCI Fast Ethernet adapters.
 #
 # The `tx' device provides support for the SMC 9432TX cards.
 #
 # The `vx' device provides support for the 3Com 3C590 and 3C595
 # early support
 #
 # The `fpa' device provides support for the Digital DEFPA PCI FDDI
 # adapter. pseudo-device fddi is also needed.
 #
 # The `meteor' device is a PCI video capture board. It can also have the
 # following options:
 #   options METEOR_ALLOC_PAGES=xxx	preallocate kernel pages for data entry
 #	figure (ROWS*COLUMN*BYTES_PER_PIXEL*FRAME+PAGE_SIZE-1)/PAGE_SIZE
 #   options METEOR_DEALLOC_PAGES	remove all allocated pages on close(2)
 #   options METEOR_DEALLOC_ABOVE=xxx	remove all allocated pages above the
 #	specified amount. If this value is below the allocated amount no action
 #	taken
 #   option METEOR_SYSTEM_DEFAULT={METEOR_PAL|METEOR_NTSC|METEOR_SECAM}, used
 #	for initialization of fps routine when a signal is not present.
 #
 # The 'bktr' device is a PCI video capture board. It also has a TV tuner
 # on board.
 #
 controller	pci0
 controller	ahc1
 controller	ncr0
 controller	amd0
 device		de0
 device		fxp0
 device		tx0
 device		vx0
 device		fpa0
 device		meteor0
 device		bktr0
 
 
 #
 # PCCARD/PCMCIA
 #
 # card: slot controller
 # pcic: slots
 controller	card0
 controller	pcic0 at card?
 controller	pcic1 at card?
 
 #
 # Laptop/Notebook options:
 #
 # See also:
 #  apm under `Miscellaneous hardware'
 # above.
 
 # For older notebooks that signal a powerfail condition (external
 # power supply dropped, or battery state low) by issuing an NMI:
 
 options		POWERFAIL_NMI	# make it beep instead of panicing
 
 #
 # Parallel-Port Bus
 #
 # Parallel port bus support is provided by the `ppbus' device.
 # Multiple devices may be attached to the parallel port, devices
 # are automatically probed and attached when found.
 #
 # Supported devices:
 # vpo	Iomega Zip Drive
 #	Requires SCSI disk support ('scbus' and 'sd'), best
 #	performance is achieved with ports in EPP 1.9 mode.
 # nlpt	Parallel Printer
 # ppi	General-purpose I/O ("Geek Port")
 #
 # Supported interfaces:
 # ppc	ISA-bus parallel port interfaces.  
 #
 controller	ppbus0
 controller	vpo0	at ppbus?
 device		nlpt0	at ppbus?
 device		ppi0	at ppbus?
 device		pps0	at ppbus?
 
 controller	ppc0	at isa? disable port ? irq 7 vector ppcintr
 
 # Kernel BOOTP support 
 
 options		BOOTP		# Use BOOTP to obtain IP address/hostname
 options		BOOTP_NFSROOT	# NFS mount root filesystem using BOOTP info
 options		"BOOTP_NFSV3"	# Use NFS v3 to NFS mount root
 options		BOOTP_COMPAT	# Workaround for broken bootp daemons.
 
 #
 # An obsolete option to test kern_opt.c.
 #
 options		GATEWAY
 
 # If you want to disable loadable kernel modules (LKM), you
 # might want to use this option.
 #options		NO_LKM
 
 #
 # Add tie-ins for a hardware watchdog.  This only enable the hooks;
 # the user must still supply the actual driver.
 #
 options		HW_WDOG
 
 # More undocumented options for linting.
 
 options		CLK_CALIBRATION_LOOP
 options		"CLK_USE_I8254_CALIBRATION"
 options		CLK_USE_TSC_CALIBRATION
 options		CLUSTERDEBUG
 options		COMPAT_LINUX
 options		CPU_UPGRADE_HW_CACHE
 options		DEBUG
 options		"DEBUG_1284"
 options		DEVFS_ROOT
 #options	DISABLE_PSE
 options		"EXT2FS"
 options		"I586_PMC_GUPROF=0x70000"
 options		"IBCS2"
 # broken:
 #options		IPFILTER
 options		KEY
 options		KEY_DEBUG
 options		LOCKF_DEBUG
 options		LOUTB
 options		KBD_MAXRETRY=4
 options		KBD_MAXWAIT=6
 options		KBD_RESETDELAY=201
 options		KBDIO_DEBUG=2
 options		MSGMNB=2049
 options		MSGMNI=41
 options		MSGSEG=2049
 options		MSGSSZ=16
 options		MSGTQL=41
 options		NBUF=512
 options		NETATALKDEBUG
 options		NMBCLUSTERS=1024
 options		NPX_DEBUG
 options		NULLFS_DIAGNOSTIC
 options		PANIC_REBOOT_WAIT_TIME=16
 options		"PCVT_24LINESDEF"
 options		PCVT_CTRL_ALT_DEL
 options		PCVT_EMU_MOUSE
 options		PCVT_FREEBSD=211
 options		PCVT_META_ESC
 options		PCVT_NSCREENS=9
 options		PCVT_PRETTYSCRNS
 options		PCVT_SCANSET=2
 options		PCVT_SCREENSAVER
 options		PCVT_USEKBDSEC
 options		"PCVT_VT220KEYB"
 options		PSM_DEBUG=1
 options		"SCSI_2_DEF"
 options		SCSI_DELAY=8	# Be pessimistic about Joe SCSI device
 options		SCSI_NCR_DEBUG
 options		SCSI_NCR_DFLT_TAGS=4
 options		SCSI_NCR_MAX_SYNC=10000
 options		SCSI_NCR_MAX_WIDE=1
 options		SCSI_NCR_MYADDR=7
 options		SEMMAP=31
 options		SEMMNI=11
 options		SEMMNS=61
 options		SEMMNU=31
 options		SEMMSL=61
 options		SEMOPM=101
 options		SEMUME=11
 options		SHOW_BUSYBUFS	# List buffers that prevent root unmount
 options		SHMALL=1025
 options		"SHMMAX=(SHMMAXPGS*PAGE_SIZE+1)"
 options		SHMMAXPGS=1025
 options		SHMMIN=2
 options		SHMMNI=33
 options		SHMSEG=9
 options		SI_DEBUG
 options		SIMPLELOCK_DEBUG
 options		SPX_HACK
 
 # The 'dpt' driver provides hardware RAID-{0,1,5} support, multi-initiator I/O
 # See sys/dev/dpt for debugging and other subtle options.
 #   DPT_VERIFY_HINTR        Performs some strict hardware interrupts testing.
 #                           Only use if you suspect PCI bus corruption problems
 #   DPT_RESTRICTED_FREELIST Normally, the freelisat used by the DPT for queue
 #                           will grow to accomodate increased use.  This growth
 #                           will NOT shrink.  To restrict the number of queue
 #                           slots to exactly what the DPT can hold at one time,
 #                           enable this option.
 #   DPT_MEASURE_PERFORMANCE Enables a set of (semi)invasive metrics. Various
 #                           instruments are enabled.  Assumed to be enabled by
 #                           /usr/sbin/dpt_* tools.
 #   DPT_FREELIST_IS_STACK   For optimat L{1,2} CPU cache utilization, enable
 #                           this option.  Otherwise, the transaction queue is
 #                           a LIFO.  I cannot measure the performance gain.
 #   DPT_HANDLE_TIMEOUTS     Normally device timeouts are handled by the DPT.
 #                           If you ant the driver to handle timeouts, enable
 #                           this option.  If your system is very busy, this
 #                           option will create more trouble than solve.
 #   DPT_TIMEOUT_FACTOR      Used to compute the excessive amount of time to
 #                           wait when timing out with the above option.
 #  DPT_DEBUG_xxxx           These are controllable from sys/dev/dpt/dpt.h
 #  DPT_LOST_IRQ             When enabled, will try, once per second, to catch
 #                           any interrupt that got lost.  Seems to help in some
 #                           DPT-firmware/Motherboard combinations.  Minimal
 #                           cost, great benefit.
 
 controller      dpt0
 
 # DPT options
 options DPT_VERIFY_HINTR
 options DPT_RESTRICTED_FREELIST
 options DPT_MEASURE_PERFORMANCE
 options DPT_FREELIST_IS_STACK
 options DPT_HANDLE_TIMEOUTS
 options DPT_TIMEOUT_FACTOR=4
 options	DPT_INTR_DELAY=200      # Some motherboards need that
 options DPT_LOST_IRQ
Index: head/sys/isofs/cd9660/cd9660_vfsops.c
===================================================================
--- head/sys/isofs/cd9660/cd9660_vfsops.c	(revision 34265)
+++ head/sys/isofs/cd9660/cd9660_vfsops.c	(revision 34266)
@@ -1,891 +1,892 @@
 /*-
  * Copyright (c) 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley
  * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
  * Support code is derived from software contributed to Berkeley
  * by Atsushi Murai (amurai@spec.co.jp).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
- * $Id: cd9660_vfsops.c,v 1.33 1997/12/21 21:40:02 joerg Exp $
+ * $Id: cd9660_vfsops.c,v 1.34 1998/03/01 22:46:00 msmith Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <miscfs/specfs/specdev.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/cdio.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 
 #include <isofs/cd9660/iso.h>
 #include <isofs/cd9660/iso_rrip.h>
 #include <isofs/cd9660/cd9660_node.h>
 #include <isofs/cd9660/cd9660_mount.h>
 
 MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
 MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
 
 static int cd9660_mount __P((struct mount *,
 	    char *, caddr_t, struct nameidata *, struct proc *));
 static int cd9660_start __P((struct mount *, int, struct proc *));
 static int cd9660_unmount __P((struct mount *, int, struct proc *));
 static int cd9660_root __P((struct mount *, struct vnode **));
 static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, 
 	    struct proc *));
 static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
 static int cd9660_sync __P((struct mount *, int, struct ucred *, 
 	    struct proc *));
 static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
 static int cd9660_vrele __P((struct mount *, struct vnode *));
 static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 static int cd9660_vptofh __P((struct vnode *, struct fid *));
 
 static struct vfsops cd9660_vfsops = {
 	cd9660_mount,
 	cd9660_start,
 	cd9660_unmount,
 	cd9660_root,
 	cd9660_quotactl,
 	cd9660_statfs,
 	cd9660_sync,
 	cd9660_vget,
 	cd9660_vrele,
 	cd9660_fhtovp,
 	cd9660_vptofh,
 	cd9660_init
 };
 VFS_SET(cd9660_vfsops, cd9660, MOUNT_CD9660, VFCF_READONLY);
 
 
 /*
  * Called by vfs_mountroot when iso is going to be mounted as root.
  */
 
 static int iso_get_ssector __P((dev_t dev, struct proc *p));
 static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
 			    struct proc *p, struct iso_args *argp));
 static int iso_mountroot __P((struct mount *mp, struct proc *p));
 
 /*
  * Try to find the start of the last data track on this CD-ROM.  This
  * is used to mount the last session of a multi-session CD.  Bail out
  * and return 0 if we fail, this is always a safe bet.
  */
 static int
 iso_get_ssector(dev, p)
 	dev_t dev;
 	struct proc *p;
 {
 	struct ioc_toc_header h;
 	struct ioc_read_toc_single_entry t;
 	int i;
 	struct bdevsw *bd;
 	d_ioctl_t *ioctlp;
 
 	bd = bdevsw[major(dev)];
 	ioctlp = bd->d_ioctl;
 	if (ioctlp == NULL)
 		return 0;
 
 	if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) == -1)
 		return 0;
 
 	for (i = h.ending_track; i >= 0; i--) {
 		t.address_format = CD_LBA_FORMAT;
 		t.track = i;
 		if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) == -1)
 			return 0;
 		if ((t.entry.control & 4) != 0)
 			/* found a data track */
 			break;
 	}
 
 	if (i < 0)
 		return 0;
 
 	return ntohl(t.entry.addr.lba);
 }
 
 static int
 iso_mountroot(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 	struct iso_args args;
 	int error;
 
 	if ((error = bdevvp(rootdev, &rootvp))) {
 		printf("iso_mountroot: can't find rootvp");
 		return (error);
 	}
 	args.flags = ISOFSMNT_ROOT;
 	args.ssector = iso_get_ssector(rootdev, p);
 	if (bootverbose)
 		printf("iso_mountroot(): using session at block %d\n",
 		       args.ssector);
 	if (error = iso_mountfs(rootvp, mp, p, &args))
 		return (error);
 
 	(void)cd9660_statfs(mp, &mp->mnt_stat, p);
 	return (0);
 }
 
 /*
  * VFS Operations.
  *
  * mount system call
  */
 static int
 cd9660_mount(mp, path, data, ndp, p)
 	register struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;
 	struct iso_args args;
 	u_int size;
 	int error;
 	struct iso_mnt *imp = 0;
 
 	if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		return (iso_mountroot(mp, p));
 	}
 	if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
 		return (error);
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EROFS);
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		imp = VFSTOISOFS(mp);
 		if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags &
 		    D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (args.fspec == 0)
 			return (vfs_export(mp, &imp->im_export, &args.export));
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	if ((error = namei(ndp)))
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return ENOTBLK;
 	}
 	if (major(devvp->v_rdev) >= nblkdev) {
 		vrele(devvp);
 		return ENXIO;
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		error = iso_mountfs(devvp, mp, p, &args);
 	} else {
 		if (devvp != imp->im_devvp)
 			error = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return error;
 	}
 	imp = VFSTOISOFS(mp);
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
 	return 0;
 }
 
 /*
  * Common code for mount and mountroot
  */
 static int
 iso_mountfs(devvp, mp, p, argp)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct iso_args *argp;
 {
 	register struct iso_mnt *isomp = (struct iso_mnt *)0;
 	struct buf *bp = NULL;
 	dev_t dev = devvp->v_rdev;
 	int error = EINVAL;
 	int needclose = 0;
 	int high_sierra = 0;
 	int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	int iso_bsize;
 	int iso_blknum;
 	struct iso_volume_descriptor *vdp = 0;
 	struct iso_primary_descriptor *pri;
 	struct iso_sierra_primary_descriptor *pri_sierra;
 	struct iso_directory_record *rootp;
 	int logical_block_size;
 
 	if (!ronly)
 		return EROFS;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	if ((error = vfs_mountedon(devvp)))
 		return error;
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return EBUSY;
 	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
 		return (error);
 
 	if ((error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)))
 		return error;
 	needclose = 1;
 
 	/* This is the "logical sector size".  The standard says this
 	 * should be 2048 or the physical sector size on the device,
 	 * whichever is greater.  For now, we'll just use a constant.
 	 */
 	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
 
 	for (iso_blknum = 16 + argp->ssector;
 	     iso_blknum < 100 + argp->ssector;
 	     iso_blknum++) {
 		if (error = bread(devvp, iso_blknum * btodb(iso_bsize),
 				  iso_bsize, NOCRED, &bp))
 			goto out;
 		
 		vdp = (struct iso_volume_descriptor *)bp->b_data;
 		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
 			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
 				  sizeof vdp->id) != 0) {
 				error = EINVAL;
 				goto out;
 			} else
 				high_sierra = 1;
 		}
 
 		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) {
 			error = EINVAL;
 			goto out;
 		}
 
 		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY)
 			break;
 		brelse(bp);
 	}
 
 	if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) {
 		error = EINVAL;
 		goto out;
 	}
 
 	pri = (struct iso_primary_descriptor *)vdp;
 	pri_sierra = (struct iso_sierra_primary_descriptor *)vdp;
 
 	logical_block_size =
 		isonum_723 (high_sierra?
 			    pri_sierra->logical_block_size:
 			    pri->logical_block_size);
 
 	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
 	    || (logical_block_size & (logical_block_size - 1)) != 0) {
 		error = EINVAL;
 		goto out;
 	}
 
 	rootp = (struct iso_directory_record *)
 		(high_sierra?
 		 pri_sierra->root_directory_record:
 		 pri->root_directory_record);
 
 	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
 	bzero((caddr_t)isomp, sizeof *isomp);
 	isomp->logical_block_size = logical_block_size;
 	isomp->volume_space_size =
 		isonum_733 (high_sierra?
 			    pri_sierra->volume_space_size:
 			    pri->volume_space_size);
 	/*
 	 * Since an ISO9660 multi-session CD can also access previous
 	 * sessions, we have to include them into the space consider-
 	 * ations.  This doesn't yield a very accurate number since
 	 * parts of the old sessions might be inaccessible now, but we
 	 * can't do much better.  This is also important for the NFS
 	 * filehandle validation.
 	 */
 	isomp->volume_space_size += argp->ssector;
 	bcopy (rootp, isomp->root, sizeof isomp->root);
 	isomp->root_extent = isonum_733 (rootp->extent);
 	isomp->root_size = isonum_733 (rootp->size);
 
 	isomp->im_bmask = logical_block_size - 1;
 	isomp->im_bshift = 0;
 	while ((1 << isomp->im_bshift) < isomp->logical_block_size)
 		isomp->im_bshift++;
 
 	bp->b_flags |= B_AGE;
 	brelse(bp);
 	bp = NULL;
 
 	mp->mnt_data = (qaddr_t)isomp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = 0;
 	mp->mnt_flag |= MNT_LOCAL;
 	isomp->im_mountp = mp;
 	isomp->im_dev = dev;
 	isomp->im_devvp = devvp;
 
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 
 	/* Check the Rock Ridge Extention support */
 	if (!(argp->flags & ISOFSMNT_NORRIP)) {
 		if (error = bread(isomp->im_devvp,
 				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
 				  (isomp->im_bshift - DEV_BSHIFT),
 				  isomp->logical_block_size, NOCRED, &bp))
 		    goto out;
 		
 		rootp = (struct iso_directory_record *)bp->b_data;
 		
 		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
 		    argp->flags	 |= ISOFSMNT_NORRIP;
 		} else {
 		    argp->flags	 &= ~ISOFSMNT_GENS;
 		}
 
 		/*
 		 * The contents are valid,
 		 * but they will get reread as part of another vnode, so...
 		 */
 		bp->b_flags |= B_AGE;
 		brelse(bp);
 		bp = NULL;
 	}
 	isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
 
 	if(high_sierra)
 		/* this effectively ignores all the mount flags */
 		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
 	else
 		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
 		  default:
 			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
 			  break;
 		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
 			  isomp->iso_ftype = ISO_FTYPE_9660;
 			  break;
 		  case 0:
 			  isomp->iso_ftype = ISO_FTYPE_RRIP;
 			  break;
 		}
 
 	return 0;
 out:
-	devvp->v_specflags &= ~SI_MOUNTEDON;
+	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	if (needclose)
 		(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
 	if (isomp) {
 		free((caddr_t)isomp, M_ISOFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return error;
 }
 
 /*
  * Make a filesystem operational.
  * Nothing to do at the moment.
  */
 /* ARGSUSED */
 static int
 cd9660_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	return 0;
 }
 
 /*
  * unmount system call
  */
 static int
 cd9660_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 	int error, flags = 0;
 	
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 #if 0
 	mntflushbuf(mp, 0);
 	if (mntinvalbuf(mp))
 		return EBUSY;
 #endif
 	if ((error = vflush(mp, NULLVP, flags)))
 		return (error);
 
 	isomp = VFSTOISOFS(mp);
 
 
-	isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+	isomp->im_devvp->v_specmountpoint = NULL;
 	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
 	vrele(isomp->im_devvp);
 	free((caddr_t)isomp, M_ISOFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Return root of a filesystem
  */
 static int
 cd9660_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct iso_directory_record *dp =
 	    (struct iso_directory_record *)imp->root;
 	ino_t ino = isodirino(dp, imp);
 	
 	/*
 	 * With RRIP we must use the `.' entry of the root directory.
 	 * Simply tell vget, that it's a relocated directory.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
 }
 
 /*
  * Do operations associated with quotas, not supported
  */
 /* ARGSUSED */
 static int
 cd9660_quotactl(mp, cmd, uid, arg, p)
 	struct mount *mp;
 	int cmd;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 
 	return (EOPNOTSUPP);
 }
 
 /*
  * Get file system statistics.
  */
 int
 cd9660_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct iso_mnt *isomp;
 
 	isomp = VFSTOISOFS(mp);
 
 	sbp->f_type = MOUNT_CD9660;
 	sbp->f_bsize = isomp->logical_block_size;
 	sbp->f_iosize = sbp->f_bsize;	/* XXX */
 	sbp->f_blocks = isomp->volume_space_size;
 	sbp->f_bfree = 0; /* total free blocks */
 	sbp->f_bavail = 0; /* blocks free for non superuser */
 	sbp->f_files =	0; /* total files */
 	sbp->f_ffree = 0; /* free file nodes */
 	if (sbp != &mp->mnt_stat) {
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	/* Use the first spare for flags: */
-	sbp->f_spare[0] = isomp->im_flags;
+	/* Don't do this!!! XXX */
+	/* sbp->f_spare[0] = isomp->im_flags; */
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 cd9660_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is in range
  * - call iget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the generation number matches
  */
 
 struct ifid {
 	ushort	ifid_len;
 	ushort	ifid_pad;
 	int	ifid_ino;
 	long	ifid_start;
 };
 
 /* ARGSUSED */
 int
 cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct ifid *ifhp = (struct ifid *)fhp;
 	register struct iso_node *ip;
 	register struct netcred *np;
 	register struct iso_mnt *imp = VFSTOISOFS(mp);
 	struct vnode *nvp;
 	int error;
 	
 #ifdef	ISOFS_DBG
 	printf("fhtovp: ino %d, start %ld\n",
 	       ifhp->ifid_ino, ifhp->ifid_start);
 #endif
 	
 	/*
 	 * Get the export permission structure for this <mp, client> tuple.
 	 */
 	np = vfs_export_lookup(mp, &imp->im_export, nam);
 	if (np == NULL)
 		return (EACCES);
 
 	if (error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	ip = VTOI(nvp);
 	if (ip->inode.iso_mode == 0) {
 		vput(nvp);
 		*vpp = NULLVP;
 		return (ESTALE);
 	}
 	*vpp = nvp;
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 int
 cd9660_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 
 	/*
 	 * XXXX
 	 * It would be nice if we didn't always set the `relocated' flag
 	 * and force the extra read, but I don't want to think about fixing
 	 * that right now.
 	 */
 	return (cd9660_vget_internal(mp, ino, vpp,
 #if 0
 	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
 #else
 	    0,
 #endif
 	    (struct iso_directory_record *)0));
 }
 
 /*
  * Complement to all vpp returning ops.
  * XXX - initially only to get rid of WILLRELE.
  */
 /* ARGSUSED */
 static int
 cd9660_vrele(mp, vp)
 	struct mount *mp;
 	struct vnode *vp;
 {
 	return (EOPNOTSUPP);
 }
 
 int
 cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 	int relocated;
 	struct iso_directory_record *isodir;
 {
 	struct iso_mnt *imp;
 	struct iso_node *ip;
 	struct buf *bp;
 	struct vnode *vp, *nvp;
 	dev_t dev;
 	int error;
 
 	imp = VFSTOISOFS(mp);
 	dev = imp->im_dev;
 	if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
 		return (0);
 
 	/* Allocate a new vnode/iso_node. */
 	if (error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
 	    M_WAITOK);
 	bzero((caddr_t)ip, sizeof(struct iso_node));
 	lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	cd9660_ihashins(ip);
 
 	if (isodir == 0) {
 		int lbn, off;
 
 		lbn = lblkno(imp, ino);
 		if (lbn >= imp->volume_space_size) {
 			vput(vp);
 			printf("fhtovp: lbn exceed volume space %d\n", lbn);
 			return (ESTALE);
 		}
 	
 		off = blkoff(imp, ino);
 		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
 			vput(vp);
 			printf("fhtovp: crosses block boundary %d\n",
 			       off + ISO_DIRECTORY_RECORD_SIZE);
 			return (ESTALE);
 		}
 	
 		error = bread(imp->im_devvp,
 			      lbn << (imp->im_bshift - DEV_BSHIFT),
 			      imp->logical_block_size, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			brelse(bp);
 			printf("fhtovp: bread error %d\n",error);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)(bp->b_data + off);
 
 		if (off + isonum_711(isodir->length) >
 		    imp->logical_block_size) {
 			vput(vp);
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
 			       off +isonum_711(isodir->length), off,
 			       isonum_711(isodir->length));
 			return (ESTALE);
 		}
 	
 #if 0
 		if (isonum_733(isodir->extent) +
 		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
 			if (bp != 0)
 				brelse(bp);
 			printf("fhtovp: file start miss %d vs %d\n",
 			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
 			       ifhp->ifid_start);
 			return (ESTALE);
 		}
 #endif
 	} else
 		bp = 0;
 
 	ip->i_mnt = imp;
 	ip->i_devvp = imp->im_devvp;
 	VREF(ip->i_devvp);
 
 	if (relocated) {
 		/*
 		 * On relocated directories we must
 		 * read the `.' entry out of a dir.
 		 */
 		ip->iso_start = ino >> imp->im_bshift;
 		if (bp != 0)
 			brelse(bp);
 		if (error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) {
 			vput(vp);
 			return (error);
 		}
 		isodir = (struct iso_directory_record *)bp->b_data;
 	}
 
 	ip->iso_extent = isonum_733(isodir->extent);
 	ip->i_size = isonum_733(isodir->size);
 	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
 	
 	/*
 	 * Setup time stamp, attribute
 	 */
 	vp->v_type = VNON;
 	switch (imp->iso_ftype) {
 	default:	/* ISO_FTYPE_9660 */
 	    {
 		struct buf *bp2;
 		int off;
 		if ((imp->im_flags & ISOFSMNT_EXTATT)
 		    && (off = isonum_711(isodir->ext_attr_length)))
 			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
 				     &bp2);
 		else
 			bp2 = NULL;
 		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
 		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
 		if (bp2)
 			brelse(bp2);
 		break;
 	    }
 	case ISO_FTYPE_RRIP:
 		cd9660_rrip_analyze(isodir, ip, imp);
 		break;
 	}
 
 	if (bp != 0)
 		brelse(bp);
 
 	/*
 	 * Initialize the associated vnode
 	 */
 	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
 	case VFIFO:
 		vp->v_op = cd9660_fifoop_p;
 		break;
 	case VCHR:
 	case VBLK:
 		/*
 		 * if device, look at device number table for translation
 		 */
 		vp->v_op = cd9660_specop_p;
 		if (nvp = checkalias(vp, ip->inode.iso_rdev, mp)) {
 			/*
 			 * Discard unneeded vnode, but save its iso_node.
 			 * Note that the lock is carried over in the iso_node
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	}
 	
 	if (ip->iso_extent == imp->root_extent)
 		vp->v_flag |= VROOT;
 
 	/*
 	 * XXX need generation number?
 	 */
 	
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 cd9660_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct iso_node *ip = VTOI(vp);
 	register struct ifid *ifhp;
 
 	ifhp = (struct ifid *)fhp;
 	ifhp->ifid_len = sizeof(struct ifid);
 
 	ifhp->ifid_ino = ip->i_number;
 	ifhp->ifid_start = ip->iso_start;
 
 #ifdef	ISOFS_DBG
 	printf("vptofh: ino %d, start %ld\n",
 	       ifhp->ifid_ino,ifhp->ifid_start);
 #endif
 	return 0;
 }
Index: head/sys/kern/kern_malloc.c
===================================================================
--- head/sys/kern/kern_malloc.c	(revision 34265)
+++ head/sys/kern/kern_malloc.c	(revision 34266)
@@ -1,454 +1,454 @@
 /*
  * Copyright (c) 1987, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_malloc.c	8.3 (Berkeley) 1/4/94
- * $Id: kern_malloc.c,v 1.43 1998/02/09 06:09:22 eivind Exp $
+ * $Id: kern_malloc.c,v 1.44 1998/02/23 07:41:23 dyson Exp $
  */
 
 #include "opt_vm.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #define MALLOC_INSTANTIATE
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/vmmeter.h>
 #include <sys/lock.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 
 static void kmeminit __P((void *));
 static void malloc_init __P((struct malloc_type *));
 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
 
 static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
 
 static struct malloc_type *kmemstatistics = M_FREE;
 static struct kmembuckets bucket[MINBUCKET + 16];
 static struct kmemusage *kmemusage;
 static char *kmembase;
 static char *kmemlimit;
 static int vm_kmem_size;
 
 #ifdef DIAGNOSTIC
 /*
  * This structure provides a set of masks to catch unaligned frees.
  */
 static long addrmask[] = { 0,
 	0x00000001, 0x00000003, 0x00000007, 0x0000000f,
 	0x0000001f, 0x0000003f, 0x0000007f, 0x000000ff,
 	0x000001ff, 0x000003ff, 0x000007ff, 0x00000fff,
 	0x00001fff, 0x00003fff, 0x00007fff, 0x0000ffff,
 };
 
 /*
  * The WEIRD_ADDR is used as known text to copy into free objects so
  * that modifications after frees can be detected.
  */
 #define WEIRD_ADDR	0xdeadc0de
 #define MAX_COPY	64
 
 /*
  * Normally the first word of the structure is used to hold the list
  * pointer for free objects. However, when running with diagnostics,
  * we use the third and fourth fields, so as to catch modifications
  * in the most commonly trashed first two words.
  */
 struct freelist {
 	long	spare0;
 	struct malloc_type *type;
 	long	spare1;
 	caddr_t	next;
 };
 #else /* !DIAGNOSTIC */
 struct freelist {
 	caddr_t	next;
 };
 #endif /* DIAGNOSTIC */
 
 /*
  * Allocate a block of memory
  */
 void *
 malloc(size, type, flags)
 	unsigned long size;
 	struct malloc_type *type;
 	int flags;
 {
 	register struct kmembuckets *kbp;
 	register struct kmemusage *kup;
 	register struct freelist *freep;
 	long indx, npg, allocsize;
 	int s;
 	caddr_t va, cp, savedlist;
 #ifdef DIAGNOSTIC
 	long *end, *lp;
 	int copysize;
 	char *savedtype;
 #endif
 	register struct malloc_type *ksp = type;
 
 	if (!type->ks_next)
 		malloc_init(type);
 
 	indx = BUCKETINDX(size);
 	kbp = &bucket[indx];
-	s = splhigh();
+	s = splmem();
 	while (ksp->ks_memuse >= ksp->ks_limit) {
 		if (flags & M_NOWAIT) {
 			splx(s);
 			return ((void *) NULL);
 		}
 		if (ksp->ks_limblocks < 65535)
 			ksp->ks_limblocks++;
 		tsleep((caddr_t)ksp, PSWP+2, type->ks_shortdesc, 0);
 	}
 	ksp->ks_size |= 1 << indx;
 #ifdef DIAGNOSTIC
 	copysize = 1 << indx < MAX_COPY ? 1 << indx : MAX_COPY;
 #endif
 	if (kbp->kb_next == NULL) {
 		kbp->kb_last = NULL;
 		if (size > MAXALLOCSAVE)
 			allocsize = roundup(size, PAGE_SIZE);
 		else
 			allocsize = 1 << indx;
 		npg = btoc(allocsize);
 		va = (caddr_t) kmem_malloc(kmem_map, (vm_size_t)ctob(npg), flags);
 		if (va == NULL) {
 			splx(s);
 			return ((void *) NULL);
 		}
 		kbp->kb_total += kbp->kb_elmpercl;
 		kup = btokup(va);
 		kup->ku_indx = indx;
 		if (allocsize > MAXALLOCSAVE) {
 			if (npg > 65535)
 				panic("malloc: allocation too large");
 			kup->ku_pagecnt = npg;
 			ksp->ks_memuse += allocsize;
 			goto out;
 		}
 		kup->ku_freecnt = kbp->kb_elmpercl;
 		kbp->kb_totalfree += kbp->kb_elmpercl;
 		/*
 		 * Just in case we blocked while allocating memory,
 		 * and someone else also allocated memory for this
 		 * bucket, don't assume the list is still empty.
 		 */
 		savedlist = kbp->kb_next;
 		kbp->kb_next = cp = va + (npg * PAGE_SIZE) - allocsize;
 		for (;;) {
 			freep = (struct freelist *)cp;
 #ifdef DIAGNOSTIC
 			/*
 			 * Copy in known text to detect modification
 			 * after freeing.
 			 */
 			end = (long *)&cp[copysize];
 			for (lp = (long *)cp; lp < end; lp++)
 				*lp = WEIRD_ADDR;
 			freep->type = M_FREE;
 #endif /* DIAGNOSTIC */
 			if (cp <= va)
 				break;
 			cp -= allocsize;
 			freep->next = cp;
 		}
 		freep->next = savedlist;
 		if (kbp->kb_last == NULL)
 			kbp->kb_last = (caddr_t)freep;
 	}
 	va = kbp->kb_next;
 	kbp->kb_next = ((struct freelist *)va)->next;
 #ifdef DIAGNOSTIC
 	freep = (struct freelist *)va;
 	savedtype = (char *) type->ks_shortdesc;
 #if BYTE_ORDER == BIG_ENDIAN
 	freep->type = (struct malloc_type *)WEIRD_ADDR >> 16;
 #endif
 #if BYTE_ORDER == LITTLE_ENDIAN
 	freep->type = (struct malloc_type *)WEIRD_ADDR;
 #endif
 	if (((long)(&freep->next)) & 0x2)
 		freep->next = (caddr_t)((WEIRD_ADDR >> 16)|(WEIRD_ADDR << 16));
 	else
 		freep->next = (caddr_t)WEIRD_ADDR;
 	end = (long *)&va[copysize];
 	for (lp = (long *)va; lp < end; lp++) {
 		if (*lp == WEIRD_ADDR)
 			continue;
 		printf("%s %d of object %p size %ld %s %s (0x%lx != 0x%x)\n",
 			"Data modified on freelist: word", lp - (long *)va,
 			va, size, "previous type", savedtype, *lp, WEIRD_ADDR);
 		break;
 	}
 	freep->spare0 = 0;
 #endif /* DIAGNOSTIC */
 	kup = btokup(va);
 	if (kup->ku_indx != indx)
 		panic("malloc: wrong bucket");
 	if (kup->ku_freecnt == 0)
 		panic("malloc: lost data");
 	kup->ku_freecnt--;
 	kbp->kb_totalfree--;
 	ksp->ks_memuse += 1 << indx;
 out:
 	kbp->kb_calls++;
 	ksp->ks_inuse++;
 	ksp->ks_calls++;
 	if (ksp->ks_memuse > ksp->ks_maxused)
 		ksp->ks_maxused = ksp->ks_memuse;
 	splx(s);
 	return ((void *) va);
 }
 
 /*
  * Free a block of memory allocated by malloc.
  */
 void
 free(addr, type)
 	void *addr;
 	struct malloc_type *type;
 {
 	register struct kmembuckets *kbp;
 	register struct kmemusage *kup;
 	register struct freelist *freep;
 	long size;
 	int s;
 #ifdef DIAGNOSTIC
 	struct freelist *fp;
 	long *end, *lp, alloc, copysize;
 #endif
 	register struct malloc_type *ksp = type;
 
 	if (!type->ks_next)
 		panic("freeing with unknown type (%s)", type->ks_shortdesc);
 
 #ifdef DIAGNOSTIC
 	if ((char *)addr < kmembase || (char *)addr >= kmemlimit) {
 		panic("free: address 0x%x out of range", addr);
 	}
 #endif
 	kup = btokup(addr);
 	size = 1 << kup->ku_indx;
 	kbp = &bucket[kup->ku_indx];
-	s = splhigh();
+	s = splmem();
 #ifdef DIAGNOSTIC
 	/*
 	 * Check for returns of data that do not point to the
 	 * beginning of the allocation.
 	 */
 	if (size > PAGE_SIZE)
 		alloc = addrmask[BUCKETINDX(PAGE_SIZE)];
 	else
 		alloc = addrmask[kup->ku_indx];
 	if (((u_long)addr & alloc) != 0)
 		panic("free: unaligned addr 0x%x, size %d, type %s, mask %d",
 			addr, size, type->ks_shortdesc, alloc);
 #endif /* DIAGNOSTIC */
 	if (size > MAXALLOCSAVE) {
 		kmem_free(kmem_map, (vm_offset_t)addr, ctob(kup->ku_pagecnt));
 		size = kup->ku_pagecnt << PAGE_SHIFT;
 		ksp->ks_memuse -= size;
 		kup->ku_indx = 0;
 		kup->ku_pagecnt = 0;
 		if (ksp->ks_memuse + size >= ksp->ks_limit &&
 		    ksp->ks_memuse < ksp->ks_limit)
 			wakeup((caddr_t)ksp);
 		ksp->ks_inuse--;
 		kbp->kb_total -= 1;
 		splx(s);
 		return;
 	}
 	freep = (struct freelist *)addr;
 #ifdef DIAGNOSTIC
 	/*
 	 * Check for multiple frees. Use a quick check to see if
 	 * it looks free before laboriously searching the freelist.
 	 */
 	if (freep->spare0 == WEIRD_ADDR) {
 		fp = (struct freelist *)kbp->kb_next;
 		while (fp) {
 			if (fp->spare0 != WEIRD_ADDR) {
 				printf("trashed free item %p\n", fp);
 				panic("free: free item modified");
 			} else if (addr == (caddr_t)fp) {
 				printf("multiple freed item %p\n", addr);
 				panic("free: multiple free");
 			}
 			fp = (struct freelist *)fp->next;
 		}
 	}
 	/*
 	 * Copy in known text to detect modification after freeing
 	 * and to make it look free. Also, save the type being freed
 	 * so we can list likely culprit if modification is detected
 	 * when the object is reallocated.
 	 */
 	copysize = size < MAX_COPY ? size : MAX_COPY;
 	end = (long *)&((caddr_t)addr)[copysize];
 	for (lp = (long *)addr; lp < end; lp++)
 		*lp = WEIRD_ADDR;
 	freep->type = type;
 #endif /* DIAGNOSTIC */
 	kup->ku_freecnt++;
 	if (kup->ku_freecnt >= kbp->kb_elmpercl)
 		if (kup->ku_freecnt > kbp->kb_elmpercl)
 			panic("free: multiple frees");
 		else if (kbp->kb_totalfree > kbp->kb_highwat)
 			kbp->kb_couldfree++;
 	kbp->kb_totalfree++;
 	ksp->ks_memuse -= size;
 	if (ksp->ks_memuse + size >= ksp->ks_limit &&
 	    ksp->ks_memuse < ksp->ks_limit)
 		wakeup((caddr_t)ksp);
 	ksp->ks_inuse--;
 #ifdef OLD_MALLOC_MEMORY_POLICY
 	if (kbp->kb_next == NULL)
 		kbp->kb_next = addr;
 	else
 		((struct freelist *)kbp->kb_last)->next = addr;
 	freep->next = NULL;
 	kbp->kb_last = addr;
 #else
 	/*
 	 * Return memory to the head of the queue for quick reuse.  This
 	 * can improve performance by improving the probability of the
 	 * item being in the cache when it is reused.
 	 */
 	if (kbp->kb_next == NULL) {
 		kbp->kb_next = addr;
 		kbp->kb_last = addr;
 		freep->next = NULL;
 	} else {
 		freep->next = kbp->kb_next;
 		kbp->kb_next = addr;
 	}
 #endif
 	splx(s);
 }
 
 /*
  * Initialize the kernel memory allocator
  */
 /* ARGSUSED*/
 static void
 kmeminit(dummy)
 	void *dummy;
 {
 	register long indx;
 	int npg;
 	int mem_size;
 
 #if	((MAXALLOCSAVE & (MAXALLOCSAVE - 1)) != 0)
 #error "kmeminit: MAXALLOCSAVE not power of 2"
 #endif
 #if	(MAXALLOCSAVE > MINALLOCSIZE * 32768)
 #error "kmeminit: MAXALLOCSAVE too big"
 #endif
 #if	(MAXALLOCSAVE < PAGE_SIZE)
 #error "kmeminit: MAXALLOCSAVE too small"
 #endif
 
 	/*
 	 * Try to auto-tune the kernel memory size, so that it is
 	 * more applicable for a wider range of machine sizes.
 	 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
 	 * a VM_KMEM_SIZE of 12MB is a fair compromise.  The
 	 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
 	 * available, and on an X86 with a total KVA space of 256MB,
 	 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
 	 *
 	 * Note that the kmem_map is also used by the zone allocator,
 	 * so make sure that there is enough space.
 	 */
 	vm_kmem_size = VM_KMEM_SIZE;
 	mem_size = cnt.v_page_count * PAGE_SIZE;
 
 #if defined(VM_KMEM_SIZE_SCALE)
 	if ((mem_size / VM_KMEM_SIZE_SCALE) > vm_kmem_size)
 		vm_kmem_size = mem_size / VM_KMEM_SIZE_SCALE;
 #endif
 
 #if defined(VM_KMEM_SIZE_MAX)
 	if (vm_kmem_size >= VM_KMEM_SIZE_MAX)
 		vm_kmem_size = VM_KMEM_SIZE_MAX;
 #endif
 
 	if (vm_kmem_size > 2 * (cnt.v_page_count * PAGE_SIZE))
 		vm_kmem_size = 2 * (cnt.v_page_count * PAGE_SIZE);
 
 	npg = (nmbufs * MSIZE + nmbclusters * MCLBYTES + vm_kmem_size)
 		/ PAGE_SIZE;
 
 	kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
 		(vm_size_t)(npg * sizeof(struct kmemusage)));
 	kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
 		(vm_offset_t *)&kmemlimit, (vm_size_t)(npg * PAGE_SIZE));
 	kmem_map->system_map = 1;
 	for (indx = 0; indx < MINBUCKET + 16; indx++) {
 		if (1 << indx >= PAGE_SIZE)
 			bucket[indx].kb_elmpercl = 1;
 		else
 			bucket[indx].kb_elmpercl = PAGE_SIZE / (1 << indx);
 		bucket[indx].kb_highwat = 5 * bucket[indx].kb_elmpercl;
 	}
 }
 
 static void
 malloc_init(type)
 	struct malloc_type *type;
 {
 	int npg;
 	int mem_size;
 
 	if (type->ks_magic != M_MAGIC) 
 		panic("malloc type lacks magic");
 
 	if (cnt.v_page_count == 0)
 		panic("malloc_init not allowed before vm init");
 
 	/*
 	 * The default limits for each malloc region is 1/2 of the
 	 * malloc portion of the kmem map size.
 	 */
 	type->ks_limit = vm_kmem_size / 2;
 	type->ks_next = kmemstatistics;	
 	kmemstatistics = type;
 }
Index: head/sys/kern/kern_shutdown.c
===================================================================
--- head/sys/kern/kern_shutdown.c	(revision 34265)
+++ head/sys/kern/kern_shutdown.c	(revision 34266)
@@ -1,503 +1,513 @@
 /*-
  * Copyright (c) 1986, 1988, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_shutdown.c	8.3 (Berkeley) 1/21/94
- * $Id: kern_shutdown.c,v 1.27 1997/11/25 07:07:43 julian Exp $
+ * $Id: kern_shutdown.c,v 1.28 1998/02/16 23:57:44 eivind Exp $
  */
 
 #include "opt_ddb.h"
 #include "opt_hw_wdog.h"
 #include "opt_panic.h"
 #include "opt_show_busybufs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/reboot.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/conf.h>
 #include <sys/sysproto.h>
 
 #include <machine/pcb.h>
 #include <machine/clock.h>
 #include <machine/cons.h>
 #include <machine/md_var.h>
 #ifdef SMP
 #include <machine/smp.h>		/* smp_active, cpuid */
 #endif
 
 #include <sys/signalvar.h>
 
 #ifndef PANIC_REBOOT_WAIT_TIME
 #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */
 #endif
 
 /*
  * Note that stdarg.h and the ANSI style va_start macro is used for both
  * ANSI and traditional C compilers.
  */
 #include <machine/stdarg.h>
 
 #ifdef DDB
 #ifdef DDB_UNATTENDED
 static int debugger_on_panic = 0;
 #else
 static int debugger_on_panic = 1;
 #endif
 SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW,
 	&debugger_on_panic, 0, "");
 #endif
 
 #ifdef	HW_WDOG
 /*
  * If there is a hardware watchdog, point this at the function needed to
  * hold it off.
  * It's needed when the kernel needs to do some lengthy operations.
  * e.g. in wd.c when dumping core.. It's most annoying to have
  * your precious core-dump only half written because the wdog kicked in.
  */
 watchdog_tickle_fn wdog_tickler = NULL;
 #endif	/* HW_WDOG */
 
 /*
  * Variable panicstr contains argument to first call to panic; used as flag
  * to indicate that the kernel has already called panic.
  */
 const char *panicstr;
 
 /*
  * callout list for things to do a shutdown
  */
 typedef struct shutdown_list_element {
 	struct shutdown_list_element *next;
 	bootlist_fn function;
 	void *arg;
 } *sle_p;
 
 /*
  * there are two shutdown lists. Some things need to be shut down
  * Earlier than others.
  */
 static sle_p shutdown_list1;
 static sle_p shutdown_list2;
 
 static void boot __P((int)) __dead2;
 static void dumpsys __P((void));
 
 #ifndef _SYS_SYSPROTO_H_
 struct reboot_args {
 	int	opt;
 };
 #endif
 /* ARGSUSED */
 
 /*
  * The system call that results in a reboot
  */
 int
 reboot(p, uap)
 	struct proc *p;
 	struct reboot_args *uap;
 {
 	int error;
 
 	if ((error = suser(p->p_ucred, &p->p_acflag)))
 		return (error);
 
 	boot(uap->opt);
 	return (0);
 }
 
 /*
  * Called by events that want to shut down.. e.g  <CTL><ALT><DEL> on a PC
  */
 void
 shutdown_nice()
 {
 	/* Send a signal to init(8) and have it shutdown the world */
 	if (initproc != NULL) {
 		psignal(initproc, SIGINT);
 	} else {
 		/* No init(8) running, so simply reboot */
 		boot(RB_NOSYNC);
 	}
 	return;
 }
 static int	waittime = -1;
 static struct pcb dumppcb;
 
 /*
  *  Go through the rigmarole of shutting down..
  * this used to be in machdep.c but I'll be dammned if I could see
  * anything machine dependant in it.
  */
 static void
 boot(howto)
 	int howto;
 {
 	sle_p ep;
 
 #ifdef SMP
 	int c, spins;
 
 	/* The MPSPEC says that the BSP must do the shutdown */
 	if (smp_active) {
 		smp_active = 0;
 
 		spins = 100;
 
 		printf("boot() called on cpu#%d\n", cpuid);
 		while ((c = cpuid) != 0) {
 			if (spins-- < 1) {
 				printf("timeout waiting for cpu #0!\n");
 				break;
 			}
 			printf("I'm on cpu#%d, I need to be on cpu#0, sleeping..\n", c);
 			tsleep((caddr_t)&smp_active, PZERO, "cpu0wt", 10);
 		}
 	}
 #endif
 	/*
 	 * Do any callouts that should be done BEFORE syncing the filesystems.
 	 */
 	ep = shutdown_list1;
 	while (ep) {
 		shutdown_list1 = ep->next;
 		(*ep->function)(howto, ep->arg);
 		ep = ep->next;
 	}
 
 	/* 
 	 * Now sync filesystems
 	 */
 	if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) {
 		register struct buf *bp;
 		int iter, nbusy;
 
 		waittime = 0;
 		printf("\nsyncing disks... ");
 
 		sync(&proc0, NULL);
 
+		/*
+		 * With soft updates, some buffers that are
+		 * written will be remarked as dirty until other
+		 * buffers are written.
+		 */
 		for (iter = 0; iter < 20; iter++) {
 			nbusy = 0;
 			for (bp = &buf[nbuf]; --bp >= buf; ) {
 				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
 					nbusy++;
+				} else if ((bp->b_flags & (B_DELWRI | B_INVAL))
+						== B_DELWRI) {
+					/* bawrite(bp);*/
+					nbusy++;
 				}
 			}
 			if (nbusy == 0)
 				break;
 			printf("%d ", nbusy);
-			DELAY(40000 * iter);
+			sync(&proc0, NULL);
+			DELAY(50000 * iter);
 		}
 		if (nbusy) {
 			/*
 			 * Failed to sync all blocks. Indicate this and don't
 			 * unmount filesystems (thus forcing an fsck on reboot).
 			 */
 			printf("giving up\n");
 #ifdef SHOW_BUSYBUFS
 			nbusy = 0;
 			for (bp = &buf[nbuf]; --bp >= buf; ) {
 				if ((bp->b_flags & (B_BUSY | B_INVAL)) == B_BUSY) {
 					nbusy++;
 					printf("%d: dev:%08x, flags:%08x, blkno:%d, lblkno:%d\n", nbusy, bp->b_dev, bp->b_flags, bp->b_blkno, bp->b_lblkno);
 				}
 			}
 			DELAY(5000000);	/* 5 seconds */
 #endif
 		} else {
 			printf("done\n");
 			/*
 			 * Unmount filesystems
 			 */
 			if (panicstr == 0)
 				vfs_unmountall();
 		}
 		DELAY(100000);			/* wait for console output to finish */
 	}
 
 	/*
 	 * Ok, now do things that assume all filesystem activity has
 	 * been completed.
 	 */
 	ep = shutdown_list2;
 	while (ep) {
 		shutdown_list2 = ep->next;
 		(*ep->function)(howto, ep->arg);
 		ep = ep->next;
 	}
 	splhigh();
 	if (howto & RB_HALT) {
 		cpu_power_down();
 		printf("\n");
 		printf("The operating system has halted.\n");
 		printf("Please press any key to reboot.\n\n");
 		switch (cngetc()) {
 		case -1:		/* No console, just die */
 			cpu_halt();
 			/* NOTREACHED */
 		default:
 			break;
 		}
 	} else {
 		if (howto & RB_DUMP) {
 			if (!cold) {
 				savectx(&dumppcb);
 				dumppcb.pcb_cr3 = rcr3();
 				dumpsys();
 			}
 
 			if (PANIC_REBOOT_WAIT_TIME != 0) {
 				if (PANIC_REBOOT_WAIT_TIME != -1) {
 					int loop;
 					printf("Automatic reboot in %d seconds - press a key on the console to abort\n",
 						PANIC_REBOOT_WAIT_TIME);
 					for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) {
 						DELAY(1000 * 100); /* 1/10th second */
 						/* Did user type a key? */
 						if (cncheckc() != -1)
 							break;
 					}
 					if (!loop)
 						goto die;
 				}
 			} else { /* zero time specified - reboot NOW */
 				goto die;
 			}
 			printf("--> Press a key on the console to reboot <--\n");
 			cngetc();
 		}
 	}
 die:
 	printf("Rebooting...\n");
 	DELAY(1000000);	/* wait 1 sec for printf's to complete and be read */
 	/* cpu_boot(howto); */ /* doesn't do anything at the moment */
 	cpu_reset();
 	for(;;) ;
 	/* NOTREACHED */
 }
 
 /*
  * Magic number for savecore
  *
  * exported (symorder) and used at least by savecore(8)
  *
  */
 static u_long const	dumpmag = 0x8fca0101UL;	
 
 static int	dumpsize = 0;		/* also for savecore */
 
 static int	dodump = 1;
 SYSCTL_INT(_machdep, OID_AUTO, do_dump, CTLFLAG_RW, &dodump, 0, "");
 
 /* ARGSUSED */
 static void dump_conf __P((void *dummy));
 static void
 dump_conf(dummy)
 	void *dummy;
 {
 	cpu_dumpconf();
 }
 SYSINIT(dump_conf, SI_SUB_DUMP_CONF, SI_ORDER_FIRST, dump_conf, NULL)
 
 /*
  * Doadump comes here after turning off memory management and
  * getting on the dump stack, either when called above, or by
  * the auto-restart code.
  */
 static void
 dumpsys(void)
 {
 
 	if (!dodump)
 		return;
 	if (dumpdev == NODEV)
 		return;
 	if ((minor(dumpdev)&07) != 1)
 		return;
 	if (!(bdevsw[major(dumpdev)]))
 		return;
 	if (!(bdevsw[major(dumpdev)]->d_dump))
 		return;
 	dumpsize = Maxmem;
 	printf("\ndumping to dev %lx, offset %ld\n", dumpdev, dumplo);
 	printf("dump ");
 	switch ((*bdevsw[major(dumpdev)]->d_dump)(dumpdev)) {
 
 	case ENXIO:
 		printf("device bad\n");
 		break;
 
 	case EFAULT:
 		printf("device not ready\n");
 		break;
 
 	case EINVAL:
 		printf("area improper\n");
 		break;
 
 	case EIO:
 		printf("i/o error\n");
 		break;
 
 	case EINTR:
 		printf("aborted from console\n");
 		break;
 
 	default:
 		printf("succeeded\n");
 		break;
 	}
 }
 
 /*
  * Panic is called on unresolvable fatal errors.  It prints "panic: mesg",
  * and then reboots.  If we are called twice, then we avoid trying to sync
  * the disks as this often leads to recursive panics.
  */
 void
 panic(const char *fmt, ...)
 {
 	int bootopt;
 	va_list ap;
 
 	bootopt = RB_AUTOBOOT | RB_DUMP;
 	if (panicstr)
 		bootopt |= RB_NOSYNC;
 	else
 		panicstr = fmt;
 
 	printf("panic: ");
 	va_start(ap, fmt);
 	vprintf(fmt, ap);
 	va_end(ap);
 	printf("\n");
 #ifdef SMP
 	/* three seperate prints in case of an unmapped page and trap */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 
 #if defined(DDB)
 	if (debugger_on_panic)
 		Debugger ("panic");
 #endif
 	boot(bootopt);
 }
 
 /*
  * Two routines to handle adding/deleting items on the
  * shutdown callout lists
  *
  * at_shutdown():
  * Take the arguments given and put them onto the shutdown callout list.
  * However first make sure that it's not already there.
  * returns 0 on success.
  */
 int
 at_shutdown(bootlist_fn function, void *arg, int position)
 {
 	sle_p ep, *epp;
 
 	switch(position) {
 	case SHUTDOWN_PRE_SYNC:
 		epp = &shutdown_list1;
 		break;
 	case SHUTDOWN_POST_SYNC:
 		epp = &shutdown_list2;
 		break;
 	default:
 		printf("bad exit callout list specified\n");
 		return (EINVAL);
 	}
 	if (rm_at_shutdown(function, arg))
 		printf("exit callout entry already present\n");
 	ep = malloc(sizeof(*ep), M_TEMP, M_NOWAIT);
 	if (ep == NULL)
 		return (ENOMEM);
 	ep->next = *epp;
 	ep->function = function;
 	ep->arg = arg;
 	*epp = ep;
 	return (0);
 }
 
 /*
  * Scan the exit callout lists for the given items and remove them.
  * Returns the number of items removed.
  */
 int
 rm_at_shutdown(bootlist_fn function, void *arg)
 {
 	sle_p *epp, ep;
 	int count;
 
 	count = 0;
 	epp = &shutdown_list1;
 	ep = *epp;
 	while (ep) {
 		if ((ep->function == function) && (ep->arg == arg)) {
 			*epp = ep->next;
 			free(ep, M_TEMP);
 			count++;
 		} else {
 			epp = &ep->next;
 		}
 		ep = *epp;
 	}
 	epp = &shutdown_list2;
 	ep = *epp;
 	while (ep) {
 		if ((ep->function == function) && (ep->arg == arg)) {
 			*epp = ep->next;
 			free(ep, M_TEMP);
 			count++;
 		} else {
 			epp = &ep->next;
 		}
 		ep = *epp;
 	}
 	return (count);
 }
 
Index: head/sys/kern/kern_synch.c
===================================================================
--- head/sys/kern/kern_synch.c	(revision 34265)
+++ head/sys/kern/kern_synch.c	(revision 34266)
@@ -1,752 +1,752 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
- * $Id: kern_synch.c,v 1.47 1998/02/25 06:04:46 bde Exp $
+ * $Id: kern_synch.c,v 1.48 1998/03/04 10:25:55 dufault Exp $
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/cpu.h>
 #include <machine/limits.h>	/* for UCHAR_MAX = typeof(p_priority)_MAX */
 
 static void rqinit __P((void *));
 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
 
 u_char	curpriority;		/* usrpri of curproc */
 int	lbolt;			/* once a second sleep address */
 
 static void	endtsleep __P((void *));
 static void	roundrobin __P((void *arg));
 static void	schedcpu __P((void *arg));
 static void	updatepri __P((struct proc *p));
 
 #define MAXIMUM_SCHEDULE_QUANTUM	(1000000) /* arbitrary limit */
 #ifndef DEFAULT_SCHEDULE_QUANTUM
 #define DEFAULT_SCHEDULE_QUANTUM 10
 #endif
 static int quantum = DEFAULT_SCHEDULE_QUANTUM; /* default value */
 
 static int
 sysctl_kern_quantum SYSCTL_HANDLER_ARGS
 {
 	int error;
 	int new_val = quantum;
 
 	new_val = quantum;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error == 0) {
 		if ((new_val > 0) && (new_val < MAXIMUM_SCHEDULE_QUANTUM)) {
 			quantum = new_val;
 		} else {
 			error = EINVAL;
 		}
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW,
 	0, sizeof quantum, sysctl_kern_quantum, "I", "");
 
 /* maybe_resched: Decide if you need to reschedule or not
  * taking the priorities and schedulers into account.
  */
 static void maybe_resched(struct proc *chk)
 {
 	struct proc *p = curproc; /* XXX */
 
 	if (p == 0 ||
 	((chk->p_priority < curpriority) &&
 	((RTP_PRIO_BASE(chk->p_rtprio.type) ==
 	RTP_PRIO_BASE(p->p_rtprio.type)))))
 		need_resched();
 }
 
 #define ROUNDROBIN_INTERVAL (hz / quantum)
 int roundrobin_interval(void)
 {
 	return ROUNDROBIN_INTERVAL;
 }
 
 /*
  * Force switch among equal priority processes every 100ms.
  */
 /* ARGSUSED */
 static void
 roundrobin(arg)
 	void *arg;
 {
  	struct proc *p = curproc; /* XXX */
  
  	if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
  		need_resched();
 
  	timeout(roundrobin, NULL, ROUNDROBIN_INTERVAL);
 }
 
 /*
  * Constants for digital decay and forget:
  *	90% of (p_estcpu) usage in 5 * loadav time
  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
  *          Note that, as ps(1) mentions, this can let percentages
  *          total over 100% (I've seen 137.9% for 3 processes).
  *
  * Note that statclock() updates p_estcpu and p_cpticks asynchronously.
  *
  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
  * That is, the system wants to compute a value of decay such
  * that the following for loop:
  * 	for (i = 0; i < (5 * loadavg); i++)
  * 		p_estcpu *= decay;
  * will compute
  * 	p_estcpu *= 0.1;
  * for all values of loadavg:
  *
  * Mathematically this loop can be expressed by saying:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * The system computes decay as:
  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
  *
  * We wish to prove that the system's computation of decay
  * will always fulfill the equation:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * If we compute b as:
  * 	b = 2 * loadavg
  * then
  * 	decay = b / (b + 1)
  *
  * We now need to prove two things:
  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
  *
  * Facts:
  *         For x close to zero, exp(x) =~ 1 + x, since
  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
  *         For x close to zero, ln(1+x) =~ x, since
  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
  *         ln(.1) =~ -2.30
  *
  * Proof of (1):
  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
  *	solving for factor,
  *      ln(factor) =~ (-2.30/5*loadav), or
  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
  *
  * Proof of (2):
  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
  *	solving for power,
  *      power*ln(b/(b+1)) =~ -2.30, or
  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
  *
  * Actual power values for the implemented algorithm are as follows:
  *      loadav: 1       2       3       4
  *      power:  5.68    10.32   14.94   19.55
  */
 
 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
 #define	loadfactor(loadav)	(2 * (loadav))
 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
 
 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
 static fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
 
 /*
  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
  *
  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
  *
  * If you don't want to bother with the faster/more-accurate formula, you
  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
  * (more general) method of calculating the %age of CPU used by a process.
  */
 #define	CCPU_SHIFT	11
 
 /*
  * Recompute process priorities, every hz ticks.
  */
 /* ARGSUSED */
 static void
 schedcpu(arg)
 	void *arg;
 {
 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 	register struct proc *p;
 	register int s;
 	register unsigned int newcpu;
 
-	wakeup((caddr_t)&lbolt);
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		/*
 		 * Increment time in/out of memory and sleep time
 		 * (if sleeping).  We ignore overflow; with 16-bit int's
 		 * (remember them?) overflow takes 45 days.
 		 */
 		p->p_swtime++;
 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
 			p->p_slptime++;
 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
 		/*
 		 * If the process has slept the entire second,
 		 * stop recalculating its priority until it wakes up.
 		 */
 		if (p->p_slptime > 1)
 			continue;
 		s = splhigh();	/* prevent state changes and protect run queue */
 		/*
 		 * p_pctcpu is only for ps.
 		 */
 #if	(FSHIFT >= CCPU_SHIFT)
 		p->p_pctcpu += (hz == 100)?
 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
                 	100 * (((fixpt_t) p->p_cpticks)
 				<< (FSHIFT - CCPU_SHIFT)) / hz;
 #else
 		p->p_pctcpu += ((FSCALE - ccpu) *
 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
 #endif
 		p->p_cpticks = 0;
 		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
 		p->p_estcpu = min(newcpu, UCHAR_MAX);
 		resetpriority(p);
 		if (p->p_priority >= PUSER) {
 #define	PPQ	(128 / NQS)		/* priorities per queue */
 			if ((p != curproc) &&
 #ifdef SMP
 			    (u_char)p->p_oncpu == 0xff && 	/* idle */
 #endif
 			    p->p_stat == SRUN &&
 			    (p->p_flag & P_INMEM) &&
 			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
 				remrq(p);
 				p->p_priority = p->p_usrpri;
 				setrunqueue(p);
 			} else
 				p->p_priority = p->p_usrpri;
 		}
 		splx(s);
 	}
 	vmmeter();
+	wakeup((caddr_t)&lbolt);
 	timeout(schedcpu, (void *)0, hz);
 }
 
 /*
  * Recalculate the priority of a process after it has slept for a while.
  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
  * least six times the loadfactor will decay p_estcpu to zero.
  */
 static void
 updatepri(p)
 	register struct proc *p;
 {
 	register unsigned int newcpu = p->p_estcpu;
 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 
 	if (p->p_slptime > 5 * loadfac)
 		p->p_estcpu = 0;
 	else {
 		p->p_slptime--;	/* the first time was done in schedcpu */
 		while (newcpu && --p->p_slptime)
 			newcpu = (int) decay_cpu(loadfac, newcpu);
 		p->p_estcpu = min(newcpu, UCHAR_MAX);
 	}
 	resetpriority(p);
 }
 
 /*
  * We're only looking at 7 bits of the address; everything is
  * aligned to 4, lots of things are aligned to greater powers
  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
  */
 #define TABLESIZE	128
 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
 
 /*
  * During autoconfiguration or after a panic, a sleep will simply
  * lower the priority briefly to allow interrupts, then return.
  * The priority to be used (safepri) is machine-dependent, thus this
  * value is initialized and maintained in the machine-dependent layers.
  * This priority will typically be 0, or the lowest priority
  * that is safe for use on the interrupt stack; it can be made
  * higher to block network software interrupts after panics.
  */
 int safepri;
 
 void
 sleepinit()
 {
 	int i;
 
 	for (i = 0; i < TABLESIZE; i++)
 		TAILQ_INIT(&slpque[i]);
 }
 
 /*
  * General sleep call.  Suspends the current process until a wakeup is
  * performed on the specified identifier.  The process will then be made
  * runnable with the specified priority.  Sleeps at most timo/hz seconds
  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
  * before and after sleeping, else signals are not checked.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal needs to be delivered, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  */
 int
 tsleep(ident, priority, wmesg, timo)
 	void *ident;
 	int priority, timo;
 	const char *wmesg;
 {
 	struct proc *p = curproc;
 	int s, sig, catch = priority & PCATCH;
 	struct callout_handle thandle;
 
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_CSW))
 		ktrcsw(p->p_tracep, 1, 0);
 #endif
 	s = splhigh();
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration,
 		 * just give interrupts a chance, then just return;
 		 * don't run any other procs or panic below,
 		 * in case this is the idle process and already asleep.
 		 */
 		splx(safepri);
 		splx(s);
 		return (0);
 	}
 #ifdef DIAGNOSTIC
 	if(p == NULL) 
 		panic("tsleep1");
 	if (ident == NULL || p->p_stat != SRUN)
 		panic("tsleep");
 	/* XXX This is not exhaustive, just the most common case */
 	if ((p->p_procq.tqe_prev != NULL) && (*p->p_procq.tqe_prev == p))
 		panic("sleeping process already on another queue");
 #endif
 	p->p_wchan = ident;
 	p->p_wmesg = wmesg;
 	p->p_slptime = 0;
 	p->p_priority = priority & PRIMASK;
 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
 	if (timo)
 		thandle = timeout(endtsleep, (void *)p, timo);
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling CURSIG, as we could stop there, and a wakeup
 	 * or a SIGCONT (or both) could occur while we were stopped.
 	 * A SIGCONT would cause us to be marked as SSLEEP
 	 * without resuming us, thus we must be ready for sleep
 	 * when CURSIG is called.  If the wakeup happens while we're
 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
 	 */
 	if (catch) {
 		p->p_flag |= P_SINTR;
 		if ((sig = CURSIG(p))) {
 			if (p->p_wchan)
 				unsleep(p);
 			p->p_stat = SRUN;
 			goto resume;
 		}
 		if (p->p_wchan == 0) {
 			catch = 0;
 			goto resume;
 		}
 	} else
 		sig = 0;
 	p->p_stat = SSLEEP;
 	p->p_stats->p_ru.ru_nvcsw++;
 	mi_switch();
 resume:
 	curpriority = p->p_usrpri;
 	splx(s);
 	p->p_flag &= ~P_SINTR;
 	if (p->p_flag & P_TIMEOUT) {
 		p->p_flag &= ~P_TIMEOUT;
 		if (sig == 0) {
 #ifdef KTRACE
 			if (KTRPOINT(p, KTR_CSW))
 				ktrcsw(p->p_tracep, 0, 0);
 #endif
 			return (EWOULDBLOCK);
 		}
 	} else if (timo)
 		untimeout(endtsleep, (void *)p, thandle);
 	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_CSW))
 			ktrcsw(p->p_tracep, 0, 0);
 #endif
 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
 			return (EINTR);
 		return (ERESTART);
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_CSW))
 		ktrcsw(p->p_tracep, 0, 0);
 #endif
 	return (0);
 }
 
 /*
  * Implement timeout for tsleep.
  * If process hasn't been awakened (wchan non-zero),
  * set timeout flag and undo the sleep.  If proc
  * is stopped, just unsleep so it will remain stopped.
  */
 static void
 endtsleep(arg)
 	void *arg;
 {
 	register struct proc *p;
 	int s;
 
 	p = (struct proc *)arg;
 	s = splhigh();
 	if (p->p_wchan) {
 		if (p->p_stat == SSLEEP)
 			setrunnable(p);
 		else
 			unsleep(p);
 		p->p_flag |= P_TIMEOUT;
 	}
 	splx(s);
 }
 
 /*
  * Remove a process from its wait queue
  */
 void
 unsleep(p)
 	register struct proc *p;
 {
 	int s;
 
 	s = splhigh();
 	if (p->p_wchan) {
 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
 		p->p_wchan = 0;
 	}
 	splx(s);
 }
 
 /*
  * Make all processes sleeping on the specified identifier runnable.
  */
 void
 wakeup(ident)
 	register void *ident;
 {
 	register struct slpquehead *qp;
 	register struct proc *p;
 	int s;
 
 	s = splhigh();
 	qp = &slpque[LOOKUP(ident)];
 restart:
 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
 #ifdef DIAGNOSTIC
 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
 			panic("wakeup");
 #endif
 		if (p->p_wchan == ident) {
 			TAILQ_REMOVE(qp, p, p_procq);
 			p->p_wchan = 0;
 			if (p->p_stat == SSLEEP) {
 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
 				if (p->p_slptime > 1)
 					updatepri(p);
 				p->p_slptime = 0;
 				p->p_stat = SRUN;
 				if (p->p_flag & P_INMEM) {
 					setrunqueue(p);
 					maybe_resched(p);
 				} else {
 					p->p_flag |= P_SWAPINREQ;
 					wakeup((caddr_t)&proc0);
 				}
 				/* END INLINE EXPANSION */
 				goto restart;
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  * Make a process sleeping on the specified identifier runnable.
  * May wake more than one process if a target prcoess is currently
  * swapped out.
  */
 void
 wakeup_one(ident)
 	register void *ident;
 {
 	register struct slpquehead *qp;
 	register struct proc *p;
 	int s;
 
 	s = splhigh();
 	qp = &slpque[LOOKUP(ident)];
 
 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
 #ifdef DIAGNOSTIC
 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
 			panic("wakeup_one");
 #endif
 		if (p->p_wchan == ident) {
 			TAILQ_REMOVE(qp, p, p_procq);
 			p->p_wchan = 0;
 			if (p->p_stat == SSLEEP) {
 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
 				if (p->p_slptime > 1)
 					updatepri(p);
 				p->p_slptime = 0;
 				p->p_stat = SRUN;
 				if (p->p_flag & P_INMEM) {
 					setrunqueue(p);
 					maybe_resched(p);
 					break;
 				} else {
 					p->p_flag |= P_SWAPINREQ;
 					wakeup((caddr_t)&proc0);
 				}
 				/* END INLINE EXPANSION */
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  * The machine independent parts of mi_switch().
  * Must be called at splstatclock() or higher.
  */
 void
 mi_switch()
 {
 	register struct proc *p = curproc;	/* XXX */
 	register struct rlimit *rlim;
 	register long s, u;
 	int x;
 	struct timeval tv;
 
 	/*
 	 * XXX this spl is almost unnecessary.  It is partly to allow for
 	 * sloppy callers that don't do it (issignal() via CURSIG() is the
 	 * main offender).  It is partly to work around a bug in the i386
 	 * cpu_switch() (the ipl is not preserved).  We ran for years
 	 * without it.  I think there was only a interrupt latency problem.
 	 * The main caller, tsleep(), does an splx() a couple of instructions
 	 * after calling here.  The buggy caller, issignal(), usually calls
 	 * here at spl0() and sometimes returns at splhigh().  The process
 	 * then runs for a little too long at splhigh().  The ipl gets fixed
 	 * when the process returns to user mode (or earlier).
 	 *
 	 * It would probably be better to always call here at spl0(). Callers
 	 * are prepared to give up control to another process, so they must
 	 * be prepared to be interrupted.  The clock stuff here may not
 	 * actually need splstatclock().
 	 */
 	x = splstatclock();
 
 #ifdef SIMPLELOCK_DEBUG
 	if (p->p_simple_locks)
 		printf("sleep: holding simple lock\n");
 #endif
 	/*
 	 * Compute the amount of time during which the current
 	 * process was running, and add that to its total so far.
 	 */
 	microtime(&tv);
 	u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
 	s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
 	if (u < 0) {
 		u += 1000000;
 		s--;
 	} else if (u >= 1000000) {
 		u -= 1000000;
 		s++;
 	}
 #ifdef SMP
 	if (s < 0)
 		s = u = 0;
 #endif
 	p->p_rtime.tv_usec = u;
 	p->p_rtime.tv_sec = s;
 
 	/*
 	 * Check if the process exceeds its cpu resource allocation.
 	 * If over max, kill it.
 	 */
 	if (p->p_stat != SZOMB) {
 		rlim = &p->p_rlimit[RLIMIT_CPU];
 		if (s >= rlim->rlim_cur) {
 			if (s >= rlim->rlim_max)
 				killproc(p, "exceeded maximum CPU limit");
 			else {
 				psignal(p, SIGXCPU);
 				if (rlim->rlim_cur < rlim->rlim_max)
 					rlim->rlim_cur += 5;
 			}
 		}
 	}
 
 	/*
 	 * Pick a new current process and record its start time.
 	 */
 	cnt.v_swtch++;
 	cpu_switch(p);
 	microtime(&runtime);
 	splx(x);
 }
 
 /*
  * Initialize the (doubly-linked) run queues
  * to be empty.
  */
 /* ARGSUSED*/
 static void
 rqinit(dummy)
 	void *dummy;
 {
 	register int i;
 
 	for (i = 0; i < NQS; i++) {
 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
 		rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i];
 		idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i];
 	}
 }
 
 /*
  * Change process state to be runnable,
  * placing it on the run queue if it is in memory,
  * and awakening the swapper if it isn't in memory.
  */
 void
 setrunnable(p)
 	register struct proc *p;
 {
 	register int s;
 
 	s = splhigh();
 	switch (p->p_stat) {
 	case 0:
 	case SRUN:
 	case SZOMB:
 	default:
 		panic("setrunnable");
 	case SSTOP:
 	case SSLEEP:
 		unsleep(p);		/* e.g. when sending signals */
 		break;
 
 	case SIDL:
 		break;
 	}
 	p->p_stat = SRUN;
 	if (p->p_flag & P_INMEM)
 		setrunqueue(p);
 	splx(s);
 	if (p->p_slptime > 1)
 		updatepri(p);
 	p->p_slptime = 0;
 	if ((p->p_flag & P_INMEM) == 0) {
 		p->p_flag |= P_SWAPINREQ;
 		wakeup((caddr_t)&proc0);
 	}
 	else
 		maybe_resched(p);
 }
 
 /*
  * Compute the priority of a process when running in user mode.
  * Arrange to reschedule if the resulting priority is better
  * than that of the current process.
  */
 void
 resetpriority(p)
 	register struct proc *p;
 {
 	register unsigned int newpriority;
 
 	if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
 		newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
 		newpriority = min(newpriority, MAXPRI);
 		p->p_usrpri = newpriority;
 	}
 	maybe_resched(p);
 }
 
 /* ARGSUSED */
 static void sched_setup __P((void *dummy));
 static void
 sched_setup(dummy)
 	void *dummy;
 {
 	/* Kick off timeout driven events by calling first time. */
 	roundrobin(NULL);
 	schedcpu(NULL);
 }
 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
 
Index: head/sys/kern/vfs_bio.c
===================================================================
--- head/sys/kern/vfs_bio.c	(revision 34265)
+++ head/sys/kern/vfs_bio.c	(revision 34266)
@@ -1,2314 +1,2390 @@
 /*
  * Copyright (c) 1994,1997 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Absolutely no warranty of function or purpose is made by the author
  *		John S. Dyson.
  *
- * $Id: vfs_bio.c,v 1.153 1998/03/04 03:17:30 dyson Exp $
+ * $Id: vfs_bio.c,v 1.154 1998/03/07 21:35:24 dyson Exp $
  */
 
 /*
  * this file contains a new buffer I/O scheme implementing a coherent
  * VM object and buffer cache scheme.  Pains have been taken to make
  * sure that the performance degradation associated with schemes such
  * as this is not realized.
  *
  * Author:  John S. Dyson
  * Significant help during the development and debugging phases
  * had been provided by David Greenman, also of the FreeBSD core team.
  */
 
 #include "opt_bounce.h"
 
 #define VMIO
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/vmmeter.h>
 #include <sys/lock.h>
+#include <miscfs/specfs/specdev.h>
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_map.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
 
 static MALLOC_DEFINE(M_BIOBUF, "BIO buffer", "BIO buffer");
 
+struct	bio_ops bioops;		/* I/O operation notification */
+
+#if 0 	/* replaced bu sched_sync */
 static void vfs_update __P((void));
 static struct	proc *updateproc;
 static struct kproc_desc up_kp = {
 	"update",
 	vfs_update,
 	&updateproc
 };
 SYSINIT_KT(update, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+#endif
 
 struct buf *buf;		/* buffer header pool */
 struct swqueue bswlist;
 
 static int count_lock_queue __P((void));
 static void vm_hold_free_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vm_hold_load_pages(struct buf * bp, vm_offset_t from,
 		vm_offset_t to);
 static void vfs_buf_set_valid(struct buf *bp, vm_ooffset_t foff,
 			      vm_offset_t off, vm_offset_t size,
 			      vm_page_t m);
 static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off,
 			       int pageno, vm_page_t m);
 static void vfs_clean_pages(struct buf * bp);
 static void vfs_setdirty(struct buf *bp);
 static void vfs_vmio_release(struct buf *bp);
 static void flushdirtybuffers(int slpflag, int slptimeo);
 
 int needsbuffer;
 
 /*
  * Internal update daemon, process 3
  *	The variable vfs_update_wakeup allows for internal syncs.
  */
 int vfs_update_wakeup;
 
 
 /*
  * buffers base kva
  */
 
 /*
  * bogus page -- for I/O to/from partially complete buffers
  * this is a temporary solution to the problem, but it is not
  * really that bad.  it would be better to split the buffer
  * for input in the case of buffers partially already in memory,
  * but the code is intricate enough already.
  */
 vm_page_t bogus_page;
 static vm_offset_t bogus_offset;
 
 static int bufspace, maxbufspace, vmiospace, maxvmiobufspace,
 	bufmallocspace, maxbufmallocspace;
 int numdirtybuffers;
 static int lodirtybuffers, hidirtybuffers;
 static int numfreebuffers, lofreebuffers, hifreebuffers;
 static int kvafreespace;
 
 SYSCTL_INT(_vfs, OID_AUTO, numdirtybuffers, CTLFLAG_RD,
 	&numdirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lodirtybuffers, CTLFLAG_RW,
 	&lodirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hidirtybuffers, CTLFLAG_RW,
 	&hidirtybuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, numfreebuffers, CTLFLAG_RD,
 	&numfreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, lofreebuffers, CTLFLAG_RW,
 	&lofreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, hifreebuffers, CTLFLAG_RW,
 	&hifreebuffers, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxbufspace, CTLFLAG_RW,
 	&maxbufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufspace, CTLFLAG_RD,
 	&bufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxvmiobufspace, CTLFLAG_RW,
 	&maxvmiobufspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, vmiospace, CTLFLAG_RD,
 	&vmiospace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, maxmallocbufspace, CTLFLAG_RW,
 	&maxbufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, bufmallocspace, CTLFLAG_RD,
 	&bufmallocspace, 0, "");
 SYSCTL_INT(_vfs, OID_AUTO, kvafreespace, CTLFLAG_RD,
 	&kvafreespace, 0, "");
 
 static LIST_HEAD(bufhashhdr, buf) bufhashtbl[BUFHSZ], invalhash;
 struct bqueues bufqueues[BUFFER_QUEUES] = {0};
 
 extern int vm_swap_size;
 
 #define BUF_MAXUSE 24
 
 #define VFS_BIO_NEED_ANY 1
 #define VFS_BIO_NEED_LOWLIMIT 2
 #define VFS_BIO_NEED_FREE 4
 
 /*
  * Initialize buffer headers and related structures.
  */
 void
 bufinit()
 {
 	struct buf *bp;
 	int i;
 
 	TAILQ_INIT(&bswlist);
 	LIST_INIT(&invalhash);
 
 	/* first, make a null hash table */
 	for (i = 0; i < BUFHSZ; i++)
 		LIST_INIT(&bufhashtbl[i]);
 
 	/* next, make a null set of free lists */
 	for (i = 0; i < BUFFER_QUEUES; i++)
 		TAILQ_INIT(&bufqueues[i]);
 
 	/* finally, initialize each buffer header and stick on empty q */
 	for (i = 0; i < nbuf; i++) {
 		bp = &buf[i];
 		bzero(bp, sizeof *bp);
 		bp->b_flags = B_INVAL;	/* we're just an empty header */
 		bp->b_dev = NODEV;
 		bp->b_rcred = NOCRED;
 		bp->b_wcred = NOCRED;
 		bp->b_qindex = QUEUE_EMPTY;
 		bp->b_vnbufs.le_next = NOLIST;
 		bp->b_generation = 0;
+		LIST_INIT(&bp->b_dep);
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	}
 /*
  * maxbufspace is currently calculated to support all filesystem blocks
  * to be 8K.  If you happen to use a 16K filesystem, the size of the buffer
  * cache is still the same as it would be for 8K filesystems.  This
  * keeps the size of the buffer cache "in check" for big block filesystems.
  */
 	maxbufspace = (nbuf + 8) * DFLTBSIZE;
 /*
  * reserve 1/3 of the buffers for metadata (VDIR) which might not be VMIO'ed
  */
 	maxvmiobufspace = 2 * maxbufspace / 3;
 /*
  * Limit the amount of malloc memory since it is wired permanently into
  * the kernel space.  Even though this is accounted for in the buffer
  * allocation, we don't want the malloced region to grow uncontrolled.
  * The malloc scheme improves memory utilization significantly on average
  * (small) directories.
  */
 	maxbufmallocspace = maxbufspace / 20;
 
 /*
  * Remove the probability of deadlock conditions by limiting the
  * number of dirty buffers.
  */
 	hidirtybuffers = nbuf / 8 + 20;
 	lodirtybuffers = nbuf / 16 + 10;
 	numdirtybuffers = 0;
 	lofreebuffers = nbuf / 18 + 5;
 	hifreebuffers = 2 * lofreebuffers;
 	numfreebuffers = nbuf;
 	kvafreespace = 0;
 
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 			VM_ALLOC_NORMAL);
 
 }
 
 /*
  * Free the kva allocation for a buffer
  * Must be called only at splbio or higher,
  *  as this is the only locking for buffer_map.
  */
 static void
 bfreekva(struct buf * bp)
 {
 	if (bp->b_kvasize == 0)
 		return;
 		
 	vm_map_delete(buffer_map,
 		(vm_offset_t) bp->b_kvabase,
 		(vm_offset_t) bp->b_kvabase + bp->b_kvasize);
 
 	bp->b_kvasize = 0;
 
 }
 
 /*
  * remove the buffer from the appropriate free list
  */
 void
 bremfree(struct buf * bp)
 {
 	int s = splbio();
 
 	if (bp->b_qindex != QUEUE_NONE) {
 		if (bp->b_qindex == QUEUE_EMPTY) {
 			kvafreespace -= bp->b_kvasize;
 		}
 		TAILQ_REMOVE(&bufqueues[bp->b_qindex], bp, b_freelist);
 		bp->b_qindex = QUEUE_NONE;
 	} else {
 #if !defined(MAX_PERF)
 		panic("bremfree: removing a buffer when not on a queue");
 #endif
 	}
 	if ((bp->b_flags & B_INVAL) ||
 		(bp->b_flags & (B_DELWRI|B_LOCKED)) == 0)
 		--numfreebuffers;
 	splx(s);
 }
 
 
 /*
  * Get a buffer with the specified data.  Look in the cache first.
  */
 int
 bread(struct vnode * vp, daddr_t blkno, int size, struct ucred * cred,
     struct buf ** bpp)
 {
 	struct buf *bp;
 
 	bp = getblk(vp, blkno, size, 0, 0);
 	*bpp = bp;
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != NULL)
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(bp);
 		return (biowait(bp));
 	}
 	return (0);
 }
 
 /*
  * Operates like bread, but also starts asynchronous I/O on
  * read-ahead blocks.
  */
 int
 breadn(struct vnode * vp, daddr_t blkno, int size,
     daddr_t * rablkno, int *rabsize,
     int cnt, struct ucred * cred, struct buf ** bpp)
 {
 	struct buf *bp, *rabp;
 	int i;
 	int rv = 0, readwait = 0;
 
 	*bpp = bp = getblk(vp, blkno, size, 0, 0);
 
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (curproc != NULL)
 			curproc->p_stats->p_ru.ru_inblock++;
 		bp->b_flags |= B_READ;
 		bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		if (bp->b_rcred == NOCRED) {
 			if (cred != NOCRED)
 				crhold(cred);
 			bp->b_rcred = cred;
 		}
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(bp);
 		++readwait;
 	}
 	for (i = 0; i < cnt; i++, rablkno++, rabsize++) {
 		if (inmem(vp, *rablkno))
 			continue;
 		rabp = getblk(vp, *rablkno, *rabsize, 0, 0);
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (curproc != NULL)
 				curproc->p_stats->p_ru.ru_inblock++;
 			rabp->b_flags |= B_READ | B_ASYNC;
 			rabp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 			if (rabp->b_rcred == NOCRED) {
 				if (cred != NOCRED)
 					crhold(cred);
 				rabp->b_rcred = cred;
 			}
 			vfs_busy_pages(rabp, 0);
 			VOP_STRATEGY(rabp);
 		} else {
 			brelse(rabp);
 		}
 	}
 
 	if (readwait) {
 		rv = biowait(bp);
 	}
 	return (rv);
 }
 
 /*
  * Write, release buffer on completion.  (Done by iodone
  * if async.)
  */
 int
 bwrite(struct buf * bp)
 {
 	int oldflags = bp->b_flags;
+	struct vnode *vp;
+	struct mount *mp;
 
+
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return (0);
 	}
 #if !defined(MAX_PERF)
 	if (!(bp->b_flags & B_BUSY))
 		panic("bwrite: buffer is not busy???");
 #endif
 
 	bp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
 	bp->b_flags |= B_WRITEINPROG;
 
 	if ((oldflags & B_DELWRI) == B_DELWRI) {
 		--numdirtybuffers;
 		reassignbuf(bp, bp->b_vp);
 	}
 
 	bp->b_vp->v_numoutput++;
 	vfs_busy_pages(bp, 1);
 	if (curproc != NULL)
 		curproc->p_stats->p_ru.ru_oublock++;
 	VOP_STRATEGY(bp);
 
+	/*
+	 * Collect statistics on synchronous and asynchronous writes.
+	 * Writes to block devices are charged to their associated
+	 * filesystem (if any).
+	 */
+	if ((vp = bp->b_vp) != NULL) {
+		if (vp->v_type == VBLK)
+			mp = vp->v_specmountpoint;
+		else
+			mp = vp->v_mount;
+		if (mp != NULL)
+			if ((oldflags & B_ASYNC) == 0)
+				mp->mnt_stat.f_syncwrites++;
+			else
+				mp->mnt_stat.f_asyncwrites++;
+	}
+
 	if ((oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
 			reassignbuf(bp, bp->b_vp);
 		}
 		brelse(bp);
 		return (rtval);
 	}
 	return (0);
 }
 
 inline void
 vfs_bio_need_satisfy(void) {
 	++numfreebuffers;
 	if (!needsbuffer)
 		return;
 	if (numdirtybuffers < lodirtybuffers) {
 		needsbuffer &= ~(VFS_BIO_NEED_ANY | VFS_BIO_NEED_LOWLIMIT);
 	} else {
 		needsbuffer &= ~VFS_BIO_NEED_ANY;
 	}
 	if (numfreebuffers >= hifreebuffers) {
 		needsbuffer &= ~VFS_BIO_NEED_FREE;
 	}
 	wakeup(&needsbuffer);
 }
 
 /*
  * Delayed write. (Buffer is marked dirty).
  */
 void
 bdwrite(struct buf * bp)
 {
+	int s;
+	struct vnode *vp;
 
 #if !defined(MAX_PERF)
 	if ((bp->b_flags & B_BUSY) == 0) {
 		panic("bdwrite: buffer is not busy");
 	}
 #endif
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return;
 	}
 	if (bp->b_flags & B_TAPE) {
 		bawrite(bp);
 		return;
 	}
 	bp->b_flags &= ~(B_READ|B_RELBUF);
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		bp->b_flags |= B_DONE | B_DELWRI;
+		s = splbio();
 		reassignbuf(bp, bp->b_vp);
+		splx(s);
 		++numdirtybuffers;
 	}
 
 	/*
 	 * This bmap keeps the system from needing to do the bmap later,
 	 * perhaps when the system is attempting to do a sync.  Since it
 	 * is likely that the indirect block -- or whatever other datastructure
 	 * that the filesystem needs is still in memory now, it is a good
 	 * thing to do this.  Note also, that if the pageout daemon is
 	 * requesting a sync -- there might not be enough memory to do
 	 * the bmap then...  So, this is important to do.
 	 */
 	if (bp->b_lblkno == bp->b_blkno) {
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
 	vfs_setdirty(bp);
 
 	/*
 	 * We need to do this here to satisfy the vnode_pager and the
 	 * pageout daemon, so that it thinks that the pages have been
 	 * "cleaned".  Note that since the pages are in a delayed write
 	 * buffer -- the VFS layer "will" see that the pages get written
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
 	bqrelse(bp);
 
+	/*
+	 * XXX The soft dependency code is not prepared to
+	 * have I/O done when a bdwrite is requested. For
+	 * now we just let the write be delayed if it is
+	 * requested by the soft dependency code.
+	 */
+	if ((vp = bp->b_vp) &&
+	    (vp->v_type == VBLK && vp->v_specmountpoint &&
+	    (vp->v_specmountpoint->mnt_flag & MNT_SOFTDEP)) ||
+	    (vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)))
+		return;
+
 	if (numdirtybuffers >= hidirtybuffers)
 		flushdirtybuffers(0, 0);
 
 	return;
 }
 
+
 /*
+ * Same as first half of bdwrite, mark buffer dirty, but do not release it.
+ * Check how this compares with vfs_setdirty(); XXX [JRE]
+ */
+void
+bdirty(bp)
+      struct buf *bp;
+{
+	int s;
+	
+	bp->b_flags &= ~(B_READ|B_RELBUF); /* XXX ??? check this */
+	if ((bp->b_flags & B_DELWRI) == 0) {
+		bp->b_flags |= B_DONE | B_DELWRI; /* why done? XXX JRE */
+		s = splbio();
+		reassignbuf(bp, bp->b_vp);
+		splx(s);
+		++numdirtybuffers;
+	}
+}
+
+/*
  * Asynchronous write.
  * Start output on a buffer, but do not wait for it to complete.
  * The buffer is released when the output completes.
  */
 void
 bawrite(struct buf * bp)
 {
 	bp->b_flags |= B_ASYNC;
 	(void) VOP_BWRITE(bp);
 }
 
 /*
  * Ordered write.
  * Start output on a buffer, but only wait for it to complete if the
  * output device cannot guarantee ordering in some other way.  Devices
  * that can perform asynchronous ordered writes will set the B_ASYNC
  * flag in their strategy routine.
  * The buffer is released when the output completes.
  */
 int
 bowrite(struct buf * bp)
 {
 	/*
 	 * XXX Add in B_ASYNC once the SCSI
 	 *     layer can deal with ordered
 	 *     writes properly.
 	 */
 	bp->b_flags |= B_ORDERED;
 	return (VOP_BWRITE(bp));
 }
 
 /*
  * Release a buffer.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
 	if (bp->b_flags & B_CLUSTER) {
 		relpbuf(bp);
 		return;
 	}
 
 	s = splbio();
 
 	/* anyone need this block? */
 	if (bp->b_flags & B_WANTED) {
 		bp->b_flags &= ~(B_WANTED | B_AGE);
 		wakeup(bp);
 	} 
 
 	if (bp->b_flags & B_LOCKED)
 		bp->b_flags &= ~B_ERROR;
 
 	if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR)) ||
 	    (bp->b_bufsize <= 0)) {
 		bp->b_flags |= B_INVAL;
+		if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_deallocate)
+			(*bioops.io_deallocate)(bp);
 		if (bp->b_flags & B_DELWRI)
 			--numdirtybuffers;
 		bp->b_flags &= ~(B_DELWRI | B_CACHE);
 		if ((bp->b_flags & B_VMIO) == 0) {
 			if (bp->b_bufsize)
 				allocbuf(bp, 0);
 			if (bp->b_vp)
 				brelvp(bp);
 		}
 	}
 
 	/*
 	 * VMIO buffer rundown.  It is not very necessary to keep a VMIO buffer
 	 * constituted, so the B_INVAL flag is used to *invalidate* the buffer,
 	 * but the VM object is kept around.  The B_NOCACHE flag is used to
 	 * invalidate the pages in the VM object.
 	 *
 	 * If the buffer is a partially filled NFS buffer, keep it
 	 * since invalidating it now will lose informatio.  The valid
 	 * flags in the vm_pages have only DEV_BSIZE resolution but
 	 * the b_validoff, b_validend fields have byte resolution.
 	 * This can avoid unnecessary re-reads of the buffer.
 	 * XXX this seems to cause performance problems.
 	 */
 	if ((bp->b_flags & B_VMIO)
 	    && !(bp->b_vp->v_tag == VT_NFS &&
 		 bp->b_vp->v_type != VBLK &&
 		 (bp->b_flags & B_DELWRI) != 0)
 #ifdef notdef
 	    && (bp->b_vp->v_tag != VT_NFS
 		|| bp->b_vp->v_type == VBLK
 		|| (bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR))
 		|| bp->b_validend == 0
 		|| (bp->b_validoff == 0
 		    && bp->b_validend == bp->b_bufsize))
 #endif
 	    ) {
 
 		int i, j, resid;
 		vm_page_t m;
 		off_t foff;
 		vm_pindex_t poff;
 		vm_object_t obj;
 		struct vnode *vp;
 		int blksize;
 
 		vp = bp->b_vp;
 
 		if (vp->v_type == VBLK)
 			blksize = DEV_BSIZE;
 		else
 			blksize = vp->v_mount->mnt_stat.f_iosize;
 
 		resid = bp->b_bufsize;
 		foff = -1LL;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 
 				obj = (vm_object_t) vp->v_object;
 
 				foff = (off_t) bp->b_lblkno * blksize;
 				poff = OFF_TO_IDX(foff);
 
 				for (j = i; j < bp->b_npages; j++) {
 					m = bp->b_pages[j];
 					if (m == bogus_page) {
 						m = vm_page_lookup(obj, poff + j);
 #if !defined(MAX_PERF)
 						if (!m) {
 							panic("brelse: page missing\n");
 						}
 #endif
 						bp->b_pages[j] = m;
 					}
 				}
 
 				if ((bp->b_flags & B_INVAL) == 0) {
 					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 				}
 				break;
 			}
 			if (bp->b_flags & (B_NOCACHE|B_ERROR)) {
 				if ((blksize & PAGE_MASK) == 0) {
 					vm_page_set_invalid(m, 0, resid);
 				} else {
 					if (foff == -1LL)
 						foff = (off_t) bp->b_lblkno * blksize;
 					vm_page_set_invalid(m, (vm_offset_t) foff, resid);
 				}
 			}
 			resid -= PAGE_SIZE;
 		}
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	} else if (bp->b_flags & B_VMIO) {
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
 
 	}
 			
 #if !defined(MAX_PERF)
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("brelse: free buffer onto another queue???");
 #endif
 
 	/* enqueue */
 	/* buffers with no memory */
 	if (bp->b_bufsize == 0) {
 		bp->b_flags |= B_INVAL;
 		bp->b_qindex = QUEUE_EMPTY;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_EMPTY], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 		kvafreespace += bp->b_kvasize;
 		bp->b_generation++;
 
 	/* buffers with junk contents */
 	} else if (bp->b_flags & (B_ERROR | B_INVAL | B_NOCACHE | B_RELBUF)) {
 		bp->b_flags |= B_INVAL;
 		bp->b_qindex = QUEUE_AGE;
 		TAILQ_INSERT_HEAD(&bufqueues[QUEUE_AGE], bp, b_freelist);
 		LIST_REMOVE(bp, b_hash);
 		LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 		bp->b_dev = NODEV;
 		bp->b_generation++;
 
 	/* buffers that are locked */
 	} else if (bp->b_flags & B_LOCKED) {
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 
 	/* buffers with stale but valid contents */
 	} else if (bp->b_flags & B_AGE) {
 		bp->b_qindex = QUEUE_AGE;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_AGE], bp, b_freelist);
 
 	/* buffers with valid and quite potentially reuseable contents */
 	} else {
 		bp->b_qindex = QUEUE_LRU;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 	}
 
 	if ((bp->b_flags & B_INVAL) ||
 		(bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
 		if (bp->b_flags & B_DELWRI) {
 			--numdirtybuffers;
 			bp->b_flags &= ~B_DELWRI;
 		}
 		vfs_bio_need_satisfy();
 	}
 
 	/* unlock */
 	bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
 				B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	splx(s);
 }
 
 /*
  * Release a buffer.
  */
 void
 bqrelse(struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 	/* anyone need this block? */
 	if (bp->b_flags & B_WANTED) {
 		bp->b_flags &= ~(B_WANTED | B_AGE);
 		wakeup(bp);
 	} 
 
 #if !defined(MAX_PERF)
 	if (bp->b_qindex != QUEUE_NONE)
 		panic("bqrelse: free buffer onto another queue???");
 #endif
 
 	if (bp->b_flags & B_LOCKED) {
 		bp->b_flags &= ~B_ERROR;
 		bp->b_qindex = QUEUE_LOCKED;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LOCKED], bp, b_freelist);
 		/* buffers with stale but valid contents */
 	} else {
 		bp->b_qindex = QUEUE_LRU;
 		TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 	}
 
 	if ((bp->b_flags & (B_LOCKED|B_DELWRI)) == 0) {
 		vfs_bio_need_satisfy();
 	}
 
 	/* unlock */
 	bp->b_flags &= ~(B_ORDERED | B_WANTED | B_BUSY |
 		B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
 	splx(s);
 }
 
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
 {
 	int i;
 	vm_page_t m;
 
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
 		vm_page_unwire(m);
 
 		/*
 		 * We don't mess with busy pages, it is
 		 * the responsibility of the process that
 		 * busied the pages to deal with them.
 		 */
 		if ((m->flags & PG_BUSY) || (m->busy != 0))
 			continue;
 			
 		if (m->wire_count == 0) {
 
 			/*
 			 * If this is an async free -- we cannot place
 			 * pages onto the cache queue.  If it is an
 			 * async free, then we don't modify any queues.
 			 * This is probably in error (for perf reasons),
 			 * and we will eventually need to build
 			 * a more complete infrastructure to support I/O
 			 * rundown.
 			 */
 			if ((bp->b_flags & B_ASYNC) == 0) {
 
 			/*
 			 * In the case of sync buffer frees, we can do pretty much
 			 * anything to any of the memory queues.  Specifically,
 			 * the cache queue is okay to be modified.
 			 */
 				if (m->valid) {
 					if(m->dirty == 0)
 						vm_page_test_dirty(m);
 					/*
 					 * this keeps pressure off of the process memory
 					 */
 					if (m->dirty == 0 && m->hold_count == 0)
 						vm_page_cache(m);
 					else
 						vm_page_deactivate(m);
 				} else if (m->hold_count == 0) {
 					m->flags |= PG_BUSY;
 					vm_page_protect(m, VM_PROT_NONE);
 					vm_page_free(m);
 				}
 			} else {
 				/*
 				 * If async, then at least we clear the
 				 * act_count.
 				 */
 				m->act_count = 0;
 			}
 		}
 	}
 	bufspace -= bp->b_bufsize;
 	vmiospace -= bp->b_bufsize;
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	bp->b_npages = 0;
 	bp->b_bufsize = 0;
 	bp->b_flags &= ~B_VMIO;
 	if (bp->b_vp)
 		brelvp(bp);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 gbincore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 	struct bufhashhdr *bh;
 
 	bh = BUFHASH(vp, blkno);
 	bp = bh->lh_first;
 
 	/* Search hash chain */
 	while (bp != NULL) {
 		/* hit */
 		if (bp->b_vp == vp && bp->b_lblkno == blkno &&
 		    (bp->b_flags & B_INVAL) == 0) {
 			break;
 		}
 		bp = bp->b_hash.le_next;
 	}
 	return (bp);
 }
 
 /*
  * this routine implements clustered async writes for
  * clearing out B_DELWRI buffers...  This is much better
  * than the old way of writing only one buffer at a time.
  */
 int
 vfs_bio_awrite(struct buf * bp)
 {
 	int i;
 	daddr_t lblkno = bp->b_lblkno;
 	struct vnode *vp = bp->b_vp;
 	int s;
 	int ncl;
 	struct buf *bpa;
 	int nwritten;
 	int size;
 	int maxcl;
 
 	s = splbio();
 	/*
 	 * right now we support clustered writing only to regular files
 	 */
 	if ((vp->v_type == VREG) && 
 	    (vp->v_mount != 0) && /* Only on nodes that have the size info */
 	    (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) {
 
 		size = vp->v_mount->mnt_stat.f_iosize;
 		maxcl = MAXPHYS / size;
 
 		for (i = 1; i < maxcl; i++) {
 			if ((bpa = gbincore(vp, lblkno + i)) &&
 			    ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) ==
 			    (B_DELWRI | B_CLUSTEROK)) &&
 			    (bpa->b_bufsize == size)) {
 				if ((bpa->b_blkno == bpa->b_lblkno) ||
 				    (bpa->b_blkno != bp->b_blkno + ((i * size) >> DEV_BSHIFT)))
 					break;
 			} else {
 				break;
 			}
 		}
 		ncl = i;
 		/*
 		 * this is a possible cluster write
 		 */
 		if (ncl != 1) {
 			nwritten = cluster_wbuild(vp, size, lblkno, ncl);
 			splx(s);
 			return nwritten;
 		}
 	}
 
 	bremfree(bp);
 	splx(s);
 	/*
 	 * default (old) behavior, writing out only one block
 	 */
 	bp->b_flags |= B_BUSY | B_ASYNC;
 	nwritten = bp->b_bufsize;
 	(void) VOP_BWRITE(bp);
 	return nwritten;
 }
 
 
 /*
  * Find a buffer header which is available for use.
  */
 static struct buf *
 getnewbuf(struct vnode *vp, daddr_t blkno,
 	int slpflag, int slptimeo, int size, int maxsize)
 {
 	struct buf *bp, *bp1;
 	int nbyteswritten = 0;
 	vm_offset_t addr;
 	static int writerecursion = 0;
 
 start:
 	if (bufspace >= maxbufspace)
 		goto trytofreespace;
 
 	/* can we constitute a new buffer? */
 	if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]))) {
 #if !defined(MAX_PERF)
 		if (bp->b_qindex != QUEUE_EMPTY)
 			panic("getnewbuf: inconsistent EMPTY queue, qindex=%d",
 			    bp->b_qindex);
 #endif
 		bp->b_flags |= B_BUSY;
 		bremfree(bp);
 		goto fillbuf;
 	}
 trytofreespace:
 	/*
 	 * We keep the file I/O from hogging metadata I/O
 	 * This is desirable because file data is cached in the
 	 * VM/Buffer cache even if a buffer is freed.
 	 */
 	if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]))) {
 #if !defined(MAX_PERF)
 		if (bp->b_qindex != QUEUE_AGE)
 			panic("getnewbuf: inconsistent AGE queue, qindex=%d",
 			    bp->b_qindex);
 #endif
 	} else if ((bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]))) {
 #if !defined(MAX_PERF)
 		if (bp->b_qindex != QUEUE_LRU)
 			panic("getnewbuf: inconsistent LRU queue, qindex=%d",
 			    bp->b_qindex);
 #endif
 	}
 	if (!bp) {
 		/* wait for a free buffer of any kind */
 		needsbuffer |= VFS_BIO_NEED_ANY;
 		do
 			tsleep(&needsbuffer, (PRIBIO + 1) | slpflag, "newbuf",
 			    slptimeo);
 		while (needsbuffer & VFS_BIO_NEED_ANY);
 		return (0);
 	}
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_flags & B_BUSY) {
 		panic("getnewbuf: busy buffer on free list\n");
 	}
 #endif
 
 	/*
 	 * We are fairly aggressive about freeing VMIO buffers, but since
 	 * the buffering is intact without buffer headers, there is not
 	 * much loss.  We gain by maintaining non-VMIOed metadata in buffers.
 	 */
 	if ((bp->b_qindex == QUEUE_LRU) && (bp->b_usecount > 0)) {
 		if ((bp->b_flags & B_VMIO) == 0 ||
 			(vmiospace < maxvmiobufspace)) {
 			--bp->b_usecount;
 			TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
 			if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
 				TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 				goto start;
 			}
 			TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 		}
 	}
 
 
 	/* if we are a delayed write, convert to an async write */
 	if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) {
 
 		/*
 		 * If our delayed write is likely to be used soon, then
 		 * recycle back onto the LRU queue.
 		 */
 		if (vp && (bp->b_vp == vp) && (bp->b_qindex == QUEUE_LRU) &&
 			(bp->b_lblkno >= blkno) && (maxsize > 0)) {
 
 			if (bp->b_usecount > 0) {
 				if (bp->b_lblkno < blkno + (MAXPHYS / maxsize)) {
 
 					TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist);
 
 					if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) {
 						TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 						bp->b_usecount--;
 						goto start;
 					}
 					TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist);
 				}
 			}
 		}
 
 		/*
 		 * Certain layered filesystems can recursively re-enter the vfs_bio
 		 * code, due to delayed writes.  This helps keep the system from
 		 * deadlocking.
 		 */
 		if (writerecursion > 0) {
 			bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
 			while (bp) {
 				if ((bp->b_flags & B_DELWRI) == 0)
 					break;
 				bp = TAILQ_NEXT(bp, b_freelist);
 			}
 			if (bp == NULL) {
 				bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
 				while (bp) {
 					if ((bp->b_flags & B_DELWRI) == 0)
 						break;
 					bp = TAILQ_NEXT(bp, b_freelist);
 				}
 			}
 			if (bp == NULL)
 				panic("getnewbuf: cannot get buffer, infinite recursion failure");
 		} else {
 			++writerecursion;
 			nbyteswritten += vfs_bio_awrite(bp);
 			--writerecursion;
 			if (!slpflag && !slptimeo) {
 				return (0);
 			}
 			goto start;
 		}
 	}
 
 	if (bp->b_flags & B_WANTED) {
 		bp->b_flags &= ~B_WANTED;
 		wakeup(bp);
 	}
 	bremfree(bp);
 	bp->b_flags |= B_BUSY;
 
 	if (bp->b_flags & B_VMIO) {
 		bp->b_flags &= ~B_ASYNC;
 		vfs_vmio_release(bp);
 	}
 
 	if (bp->b_vp)
 		brelvp(bp);
 
 fillbuf:
 	bp->b_generation++;
 
 	/* we are not free, nor do we contain interesting data */
 	if (bp->b_rcred != NOCRED) {
 		crfree(bp->b_rcred);
 		bp->b_rcred = NOCRED;
 	}
 	if (bp->b_wcred != NOCRED) {
 		crfree(bp->b_wcred);
 		bp->b_wcred = NOCRED;
 	}
+	if (LIST_FIRST(&bp->b_dep) != NULL &&
+	    bioops.io_deallocate)
+		(*bioops.io_deallocate)(bp);
 
 	LIST_REMOVE(bp, b_hash);
 	LIST_INSERT_HEAD(&invalhash, bp, b_hash);
 	if (bp->b_bufsize) {
 		allocbuf(bp, 0);
 	}
 	bp->b_flags = B_BUSY;
 	bp->b_dev = NODEV;
 	bp->b_vp = NULL;
 	bp->b_blkno = bp->b_lblkno = 0;
 	bp->b_iodone = 0;
 	bp->b_error = 0;
 	bp->b_resid = 0;
 	bp->b_bcount = 0;
 	bp->b_npages = 0;
 	bp->b_dirtyoff = bp->b_dirtyend = 0;
 	bp->b_validoff = bp->b_validend = 0;
 	bp->b_usecount = 5;
+	/* Here, not kern_physio.c, is where this should be done*/
+	LIST_INIT(&bp->b_dep);
 
 	maxsize = (maxsize + PAGE_MASK) & ~PAGE_MASK;
 
 	/*
 	 * we assume that buffer_map is not at address 0
 	 */
 	addr = 0;
 	if (maxsize != bp->b_kvasize) {
 		bfreekva(bp);
 		
 findkvaspace:
 		/*
 		 * See if we have buffer kva space
 		 */
 		if (vm_map_findspace(buffer_map,
 			vm_map_min(buffer_map), maxsize, &addr)) {
 			if (kvafreespace > 0) {
 				int totfree = 0, freed;
 				do {
 					freed = 0;
 					for (bp1 = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
 						bp1 != NULL; bp1 = TAILQ_NEXT(bp1, b_freelist)) {
 						if (bp1->b_kvasize != 0) {
 							totfree += bp1->b_kvasize;
 							freed = bp1->b_kvasize;
 							bremfree(bp1);
 							bfreekva(bp1);
 							brelse(bp1);
 							break;
 						}
 					}
 				} while (freed);
 				/*
 				 * if we found free space, then retry with the same buffer.
 				 */
 				if (totfree)
 					goto findkvaspace;
 			}
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto trytofreespace;
 		}
 	}
 
 	/*
 	 * See if we are below are allocated minimum
 	 */
 	if (bufspace >= (maxbufspace + nbyteswritten)) {
 		bp->b_flags |= B_INVAL;
 		brelse(bp);
 		goto trytofreespace;
 	}
 
 	/*
 	 * create a map entry for the buffer -- in essence
 	 * reserving the kva space.
 	 */
 	if (addr) {
 		vm_map_insert(buffer_map, NULL, 0,
 			addr, addr + maxsize,
 			VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
 
 		bp->b_kvabase = (caddr_t) addr;
 		bp->b_kvasize = maxsize;
 	}
 	bp->b_data = bp->b_kvabase;
 	
 	return (bp);
 }
 
 static void
 waitfreebuffers(int slpflag, int slptimeo) {
 	while (numfreebuffers < hifreebuffers) {
 		flushdirtybuffers(slpflag, slptimeo);
 		if (numfreebuffers < hifreebuffers)
 			break;
 		needsbuffer |= VFS_BIO_NEED_FREE;
 		if (tsleep(&needsbuffer, PRIBIO|slpflag, "biofre", slptimeo))
 			break;
 	}
 }
 
 static void
 flushdirtybuffers(int slpflag, int slptimeo) {
 	int s;
 	static pid_t flushing = 0;
 
 	s = splbio();
 
 	if (flushing) {
 		if (flushing == curproc->p_pid) {
 			splx(s);
 			return;
 		}
 		while (flushing) {
 			if (tsleep(&flushing, PRIBIO|slpflag, "biofls", slptimeo)) {
 				splx(s);
 				return;
 			}
 		}
 	}
 	flushing = curproc->p_pid;
 
 	while (numdirtybuffers > lodirtybuffers) {
 		struct buf *bp;
 		needsbuffer |= VFS_BIO_NEED_LOWLIMIT;
 		bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]);
 		if (bp == NULL)
 			bp = TAILQ_FIRST(&bufqueues[QUEUE_LRU]);
 
 		while (bp && ((bp->b_flags & B_DELWRI) == 0)) {
 			bp = TAILQ_NEXT(bp, b_freelist);
 		}
 
 		if (bp) {
 			vfs_bio_awrite(bp);
 			continue;
 		}
 		break;
 	}
 
 	flushing = 0;
 	wakeup(&flushing);
 	splx(s);
 }
 
 /*
  * Check to see if a block is currently memory resident.
  */
 struct buf *
 incore(struct vnode * vp, daddr_t blkno)
 {
 	struct buf *bp;
 
 	int s = splbio();
 	bp = gbincore(vp, blkno);
 	splx(s);
 	return (bp);
 }
 
 /*
  * Returns true if no I/O is needed to access the
  * associated VM object.  This is like incore except
  * it also hunts around in the VM system for the data.
  */
 
 int
 inmem(struct vnode * vp, daddr_t blkno)
 {
 	vm_object_t obj;
 	vm_offset_t toff, tinc;
 	vm_page_t m;
 	vm_ooffset_t off;
 
 	if (incore(vp, blkno))
 		return 1;
 	if (vp->v_mount == NULL)
 		return 0;
 	if ((vp->v_object == NULL) || (vp->v_flag & VOBJBUF) == 0)
 		return 0;
 
 	obj = vp->v_object;
 	tinc = PAGE_SIZE;
 	if (tinc > vp->v_mount->mnt_stat.f_iosize)
 		tinc = vp->v_mount->mnt_stat.f_iosize;
 	off = blkno * vp->v_mount->mnt_stat.f_iosize;
 
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
 			return 0;
 		if (vm_page_is_valid(m, (vm_offset_t) (toff + off), tinc) == 0)
 			return 0;
 	}
 	return 1;
 }
 
 /*
  * now we set the dirty range for the buffer --
  * for NFS -- if the file is mapped and pages have
  * been written to, let it know.  We want the
  * entire range of the buffer to be marked dirty if
  * any of the pages have been written to for consistancy
  * with the b_validoff, b_validend set in the nfs write
  * code, and used by the nfs read code.
  */
 static void
 vfs_setdirty(struct buf *bp) {
 	int i;
 	vm_object_t object;
 	vm_offset_t boffset, offset;
 	/*
 	 * We qualify the scan for modified pages on whether the
 	 * object has been flushed yet.  The OBJ_WRITEABLE flag
 	 * is not cleared simply by protecting pages off.
 	 */
 	if ((bp->b_flags & B_VMIO) &&
 		((object = bp->b_pages[0]->object)->flags & (OBJ_WRITEABLE|OBJ_CLEANING))) {
 		/*
 		 * test the pages to see if they have been modified directly
 		 * by users through the VM system.
 		 */
 		for (i = 0; i < bp->b_npages; i++)
 			vm_page_test_dirty(bp->b_pages[i]);
 
 		/*
 		 * scan forwards for the first page modified
 		 */
 		for (i = 0; i < bp->b_npages; i++) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		boffset = (i << PAGE_SHIFT);
 		if (boffset < bp->b_dirtyoff) {
 			bp->b_dirtyoff = boffset;
 		}
 
 		/*
 		 * scan backwards for the last page modified
 		 */
 		for (i = bp->b_npages - 1; i >= 0; --i) {
 			if (bp->b_pages[i]->dirty) {
 				break;
 			}
 		}
 		boffset = (i + 1);
 		offset = boffset + bp->b_pages[0]->pindex;
 		if (offset >= object->size)
 			boffset = object->size - bp->b_pages[0]->pindex;
 		if (bp->b_dirtyend < (boffset << PAGE_SHIFT))
 			bp->b_dirtyend = (boffset << PAGE_SHIFT);
 	}
 }
 
 /*
  * Get a block given a specified block and offset into a file/device.
  */
 struct buf *
 getblk(struct vnode * vp, daddr_t blkno, int size, int slpflag, int slptimeo)
 {
 	struct buf *bp;
 	int i, s;
 	struct bufhashhdr *bh;
 	int maxsize;
 	int generation;
 
 	if (vp->v_mount) {
 		maxsize = vp->v_mount->mnt_stat.f_iosize;
 		/*
 		 * This happens on mount points.
 		 */
 		if (maxsize < size)
 			maxsize = size;
 	} else {
 		maxsize = size;
 	}
 
 #if !defined(MAX_PERF)
 	if (size > MAXBSIZE)
 		panic("getblk: size(%d) > MAXBSIZE(%d)\n", size, MAXBSIZE);
 #endif
 
 	s = splbio();
 loop:
 	if (numfreebuffers < lofreebuffers) {
 		waitfreebuffers(slpflag, slptimeo);
 	}
 
 	if ((bp = gbincore(vp, blkno))) {
 loop1:
 		generation = bp->b_generation;
 		if (bp->b_flags & B_BUSY) {
 			bp->b_flags |= B_WANTED;
 			if (bp->b_usecount < BUF_MAXUSE)
 				++bp->b_usecount;
 			if (!tsleep(bp,
 				(PRIBIO + 1) | slpflag, "getblk", slptimeo)) {
 				if (bp->b_generation != generation)
 					goto loop;
 				goto loop1;
 			} else {
 				splx(s);
 				return (struct buf *) NULL;
 			}
 		}
 		bp->b_flags |= B_BUSY | B_CACHE;
 		bremfree(bp);
 
 		/*
 		 * check for size inconsistancies (note that they shouldn't
 		 * happen but do when filesystems don't handle the size changes
 		 * correctly.) We are conservative on metadata and don't just
 		 * extend the buffer but write and re-constitute it.
 		 */
 
 		if (bp->b_bcount != size) {
 			bp->b_generation++;
 			if ((bp->b_flags & B_VMIO) && (size <= bp->b_kvasize)) {
 				allocbuf(bp, size);
 			} else {
 				bp->b_flags |= B_NOCACHE;
 				VOP_BWRITE(bp);
 				goto loop;
 			}
 		}
 
 		if (bp->b_usecount < BUF_MAXUSE)
 			++bp->b_usecount;
 		splx(s);
 		return (bp);
 	} else {
 		vm_object_t obj;
 
 		if ((bp = getnewbuf(vp, blkno,
 			slpflag, slptimeo, size, maxsize)) == 0) {
 			if (slpflag || slptimeo) {
 				splx(s);
 				return NULL;
 			}
 			goto loop;
 		}
 
 		/*
 		 * This code is used to make sure that a buffer is not
 		 * created while the getnewbuf routine is blocked.
 		 * Normally the vnode is locked so this isn't a problem.
 		 * VBLK type I/O requests, however, don't lock the vnode.
 		 */
 		if (!VOP_ISLOCKED(vp) && gbincore(vp, blkno)) {
 			bp->b_flags |= B_INVAL;
 			brelse(bp);
 			goto loop;
 		}
 
 		/*
 		 * Insert the buffer into the hash, so that it can
 		 * be found by incore.
 		 */
 		bp->b_blkno = bp->b_lblkno = blkno;
 		bgetvp(vp, bp);
 		LIST_REMOVE(bp, b_hash);
 		bh = BUFHASH(vp, blkno);
 		LIST_INSERT_HEAD(bh, bp, b_hash);
 
 		if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) {
 			bp->b_flags |= (B_VMIO | B_CACHE);
 #if defined(VFS_BIO_DEBUG)
 			if (vp->v_type != VREG && vp->v_type != VBLK)
 				printf("getblk: vmioing file type %d???\n", vp->v_type);
 #endif
 		} else {
 			bp->b_flags &= ~B_VMIO;
 		}
 
 		allocbuf(bp, size);
 
 		splx(s);
 #ifdef	PC98
 		/*
 		 * 1024byte/sector support
 		 */
 #define B_XXX2 0x8000000
 		if (vp->v_flag & 0x10000) bp->b_flags |= B_XXX2;
 #endif
 		return (bp);
 	}
 }
 
 /*
  * Get an empty, disassociated buffer of given size.
  */
 struct buf *
 geteblk(int size)
 {
 	struct buf *bp;
 	int s;
 
 	s = splbio();
 	while ((bp = getnewbuf(0, (daddr_t) 0, 0, 0, size, MAXBSIZE)) == 0);
 	splx(s);
 	allocbuf(bp, size);
 	bp->b_flags |= B_INVAL;
 	return (bp);
 }
 
 
 /*
  * This code constitutes the buffer memory from either anonymous system
  * memory (in the case of non-VMIO operations) or from an associated
  * VM object (in the case of VMIO operations).
  *
  * Note that this code is tricky, and has many complications to resolve
  * deadlock or inconsistant data situations.  Tread lightly!!!
  *
  * Modify the length of a buffer's underlying buffer storage without
  * destroying information (unless, of course the buffer is shrinking).
  */
 int
 allocbuf(struct buf * bp, int size)
 {
 
 	int s;
 	int newbsize, mbsize;
 	int i;
 
 #if !defined(MAX_PERF)
 	if (!(bp->b_flags & B_BUSY))
 		panic("allocbuf: buffer not busy");
 
 	if (bp->b_kvasize < size)
 		panic("allocbuf: buffer too small");
 #endif
 
 	if ((bp->b_flags & B_VMIO) == 0) {
 		caddr_t origbuf;
 		int origbufsize;
 		/*
 		 * Just get anonymous memory from the kernel
 		 */
 		mbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			newbsize = mbsize;
 		else
 #endif
 			newbsize = round_page(size);
 
 		if (newbsize < bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * malloced buffers are not shrunk
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				if (newbsize) {
 					bp->b_bcount = size;
 				} else {
 					free(bp->b_data, M_BIOBUF);
 					bufspace -= bp->b_bufsize;
 					bufmallocspace -= bp->b_bufsize;
 					bp->b_data = bp->b_kvabase;
 					bp->b_bufsize = 0;
 					bp->b_bcount = 0;
 					bp->b_flags &= ~B_MALLOC;
 				}
 				return 1;
 			}		
 #endif
 			vm_hold_free_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + newbsize,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize);
 		} else if (newbsize > bp->b_bufsize) {
 #if !defined(NO_B_MALLOC)
 			/*
 			 * We only use malloced memory on the first allocation.
 			 * and revert to page-allocated memory when the buffer grows.
 			 */
 			if ( (bufmallocspace < maxbufmallocspace) &&
 				(bp->b_bufsize == 0) &&
 				(mbsize <= PAGE_SIZE/2)) {
 
 				bp->b_data = malloc(mbsize, M_BIOBUF, M_WAITOK);
 				bp->b_bufsize = mbsize;
 				bp->b_bcount = size;
 				bp->b_flags |= B_MALLOC;
 				bufspace += mbsize;
 				bufmallocspace += mbsize;
 				return 1;
 			}
 #endif
 			origbuf = NULL;
 			origbufsize = 0;
 #if !defined(NO_B_MALLOC)
 			/*
 			 * If the buffer is growing on it's other-than-first allocation,
 			 * then we revert to the page-allocation scheme.
 			 */
 			if (bp->b_flags & B_MALLOC) {
 				origbuf = bp->b_data;
 				origbufsize = bp->b_bufsize;
 				bp->b_data = bp->b_kvabase;
 				bufspace -= bp->b_bufsize;
 				bufmallocspace -= bp->b_bufsize;
 				bp->b_bufsize = 0;
 				bp->b_flags &= ~B_MALLOC;
 				newbsize = round_page(newbsize);
 			}
 #endif
 			vm_hold_load_pages(
 			    bp,
 			    (vm_offset_t) bp->b_data + bp->b_bufsize,
 			    (vm_offset_t) bp->b_data + newbsize);
 #if !defined(NO_B_MALLOC)
 			if (origbuf) {
 				bcopy(origbuf, bp->b_data, origbufsize);
 				free(origbuf, M_BIOBUF);
 			}
 #endif
 		}
 	} else {
 		vm_page_t m;
 		int desiredpages;
 
 		newbsize = (size + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		desiredpages = (round_page(newbsize) >> PAGE_SHIFT);
 
 #if !defined(NO_B_MALLOC)
 		if (bp->b_flags & B_MALLOC)
 			panic("allocbuf: VMIO buffer can't be malloced");
 #endif
 
 		if (newbsize < bp->b_bufsize) {
 			if (desiredpages < bp->b_npages) {
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
 					 * the page is not freed here -- it
 					 * is the responsibility of vnode_pager_setsize
 					 */
 					m = bp->b_pages[i];
 #if defined(DIAGNOSTIC)
 					if (m == bogus_page)
 						panic("allocbuf: bogus page found");
 #endif
 					vm_page_sleep(m, "biodep", &m->busy);
 
 					bp->b_pages[i] = NULL;
 					vm_page_unwire(m);
 				}
 				pmap_qremove((vm_offset_t) trunc_page(bp->b_data) +
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
 		} else if (newbsize > bp->b_bufsize) {
 			vm_object_t obj;
 			vm_offset_t tinc, toff;
 			vm_ooffset_t off;
 			vm_pindex_t objoff;
 			int pageindex, curbpnpages;
 			struct vnode *vp;
 			int bsize;
 			int orig_validoff = bp->b_validoff;
 			int orig_validend = bp->b_validend;
 
 			vp = bp->b_vp;
 
 			if (vp->v_type == VBLK)
 				bsize = DEV_BSIZE;
 			else
 				bsize = vp->v_mount->mnt_stat.f_iosize;
 
 			if (bp->b_npages < desiredpages) {
 				obj = vp->v_object;
 				tinc = PAGE_SIZE;
 				if (tinc > bsize)
 					tinc = bsize;
 				off = (vm_ooffset_t) bp->b_lblkno * bsize;
 				curbpnpages = bp->b_npages;
 		doretry:
 				bp->b_validoff = orig_validoff;
 				bp->b_validend = orig_validend;
 				bp->b_flags |= B_CACHE;
 				for (toff = 0; toff < newbsize; toff += tinc) {
 					int bytesinpage;
 
 					pageindex = toff >> PAGE_SHIFT;
 					objoff = OFF_TO_IDX(off + toff);
 					if (pageindex < curbpnpages) {
 
 						m = bp->b_pages[pageindex];
 #ifdef VFS_BIO_DIAG
 						if (m->pindex != objoff)
 							panic("allocbuf: page changed offset??!!!?");
 #endif
 						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
 							bytesinpage = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
 							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
 						continue;
 					}
 					m = vm_page_lookup(obj, objoff);
 					if (!m) {
 						m = vm_page_alloc(obj, objoff, VM_ALLOC_NORMAL);
 						if (!m) {
 							VM_WAIT;
 							vm_pageout_deficit += (desiredpages - bp->b_npages);
 							goto doretry;
 						}
 
 						vm_page_wire(m);
 						m->flags &= ~PG_BUSY;
 						bp->b_flags &= ~B_CACHE;
 
 					} else if (m->flags & PG_BUSY) {
 						s = splvm();
 						if (m->flags & PG_BUSY) {
 							m->flags |= PG_WANTED;
 							tsleep(m, PVM, "pgtblk", 0);
 						}
 						splx(s);
 						goto doretry;
 					} else {
 						if ((curproc != pageproc) &&
 							((m->queue - m->pc) == PQ_CACHE) &&
 						    ((cnt.v_free_count + cnt.v_cache_count) <
 								(cnt.v_free_min + cnt.v_cache_min))) {
 							pagedaemon_wakeup();
 						}
 						bytesinpage = tinc;
 						if (tinc > (newbsize - toff))
 							bytesinpage = newbsize - toff;
 						if (bp->b_flags & B_CACHE)
 							vfs_buf_set_valid(bp, off, toff, bytesinpage, m);
 						vm_page_wire(m);
 					}
 					bp->b_pages[pageindex] = m;
 					curbpnpages = pageindex + 1;
 				}
 				if (vp->v_tag == VT_NFS && 
 				    vp->v_type != VBLK) {
 					if (bp->b_dirtyend > 0) {
 						bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
 						bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
 					}
 					if (bp->b_validend == 0)
 						bp->b_flags &= ~B_CACHE;
 				}
 				bp->b_data = (caddr_t) trunc_page(bp->b_data);
 				bp->b_npages = curbpnpages;
 				pmap_qenter((vm_offset_t) bp->b_data,
 					bp->b_pages, bp->b_npages);
 				((vm_offset_t) bp->b_data) |= off & PAGE_MASK;
 			}
 		}
 	}
 	if (bp->b_flags & B_VMIO)
 		vmiospace += (newbsize - bp->b_bufsize);
 	bufspace += (newbsize - bp->b_bufsize);
 	bp->b_bufsize = newbsize;
 	bp->b_bcount = size;
 	return 1;
 }
 
 /*
  * Wait for buffer I/O completion, returning error status.
  */
 int
 biowait(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 	while ((bp->b_flags & B_DONE) == 0)
 #if defined(NO_SCHEDULE_MODS)
 		tsleep(bp, PRIBIO, "biowait", 0);
 #else
 		if (bp->b_flags & B_READ)
 			tsleep(bp, PRIBIO, "biord", 0);
 		else
 			tsleep(bp, curproc->p_usrpri, "biowr", 0);
 #endif
 	splx(s);
 	if (bp->b_flags & B_EINTR) {
 		bp->b_flags &= ~B_EINTR;
 		return (EINTR);
 	}
 	if (bp->b_flags & B_ERROR) {
 		return (bp->b_error ? bp->b_error : EIO);
 	} else {
 		return (0);
 	}
 }
 
 /*
  * Finish I/O on a buffer, calling an optional function.
  * This is usually called from interrupt level, so process blocking
  * is not *a good idea*.
  */
 void
 biodone(register struct buf * bp)
 {
 	int s;
 
 	s = splbio();
 
 #if !defined(MAX_PERF)
 	if (!(bp->b_flags & B_BUSY))
 		panic("biodone: buffer not busy");
 #endif
 
 	if (bp->b_flags & B_DONE) {
 		splx(s);
 #if !defined(MAX_PERF)
 		printf("biodone: buffer already done\n");
 #endif
 		return;
 	}
 	bp->b_flags |= B_DONE;
 
 	if ((bp->b_flags & B_READ) == 0) {
 		vwakeup(bp);
 	}
 #ifdef BOUNCE_BUFFERS
 	if (bp->b_flags & B_BOUNCE)
 		vm_bounce_free(bp);
 #endif
 
 	/* call optional completion function if requested */
 	if (bp->b_flags & B_CALL) {
 		bp->b_flags &= ~B_CALL;
 		(*bp->b_iodone) (bp);
 		splx(s);
 		return;
 	}
+	if (LIST_FIRST(&bp->b_dep) != NULL && bioops.io_complete)
+		(*bioops.io_complete)(bp);
+
 	if (bp->b_flags & B_VMIO) {
 		int i, resid;
 		vm_ooffset_t foff;
 		vm_page_t m;
 		vm_object_t obj;
 		int iosize;
 		struct vnode *vp = bp->b_vp;
 
 		obj = vp->v_object;
 
 #if defined(VFS_BIO_DEBUG)
 		if (vp->v_usecount == 0) {
 			panic("biodone: zero vnode ref count");
 		}
 
 		if (vp->v_object == NULL) {
 			panic("biodone: missing VM object");
 		}
 
 		if ((vp->v_flag & VOBJBUF) == 0) {
 			panic("biodone: vnode is not setup for merged cache");
 		}
 #endif
 
 		if (vp->v_type == VBLK)
 			foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
 		else
 			foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 #if !defined(MAX_PERF)
 		if (!obj) {
 			panic("biodone: no object");
 		}
 #endif
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
 			    obj->paging_in_progress, bp->b_npages);
 		}
 #endif
 		iosize = bp->b_bufsize;
 		for (i = 0; i < bp->b_npages; i++) {
 			int bogusflag = 0;
 			m = bp->b_pages[i];
 			if (m == bogus_page) {
 				bogusflag = 1;
 				m = vm_page_lookup(obj, OFF_TO_IDX(foff));
 				if (!m) {
 #if defined(VFS_BIO_DEBUG)
 					printf("biodone: page disappeared\n");
 #endif
 					--obj->paging_in_progress;
 					continue;
 				}
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 			}
 #if defined(VFS_BIO_DEBUG)
 			if (OFF_TO_IDX(foff) != m->pindex) {
 				printf("biodone: foff(%d)/m->pindex(%d) mismatch\n", foff, m->pindex);
 			}
 #endif
 			resid = IDX_TO_OFF(m->pindex + 1) - foff;
 			if (resid > iosize)
 				resid = iosize;
 			/*
 			 * In the write case, the valid and clean bits are
 			 * already changed correctly, so we only need to do this
 			 * here in the read case.
 			 */
 			if ((bp->b_flags & B_READ) && !bogusflag && resid > 0) {
 				vfs_page_set_valid(bp, foff, i, m);
 			}
 
 			/*
 			 * when debugging new filesystems or buffer I/O methods, this
 			 * is the most common error that pops up.  if you see this, you
 			 * have not set the page busy flag correctly!!!
 			 */
 			if (m->busy == 0) {
 #if !defined(MAX_PERF)
 				printf("biodone: page busy < 0, "
 				    "pindex: %d, foff: 0x(%x,%x), "
 				    "resid: %d, index: %d\n",
 				    (int) m->pindex, (int)(foff >> 32),
 						(int) foff & 0xffffffff, resid, i);
 #endif
 				if (vp->v_type != VBLK)
 #if !defined(MAX_PERF)
 					printf(" iosize: %ld, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    bp->b_vp->v_mount->mnt_stat.f_iosize,
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				else
 					printf(" VDEV, lblkno: %d, flags: 0x%lx, npages: %d\n",
 					    (int) bp->b_lblkno,
 					    bp->b_flags, bp->b_npages);
 				printf(" valid: 0x%x, dirty: 0x%x, wired: %d\n",
 				    m->valid, m->dirty, m->wire_count);
 #endif
 				panic("biodone: page busy < 0\n");
 			}
 			PAGE_BWAKEUP(m);
 			--obj->paging_in_progress;
 			foff += resid;
 			iosize -= resid;
 		}
 		if (obj &&
 			(obj->paging_in_progress == 0) &&
 		    (obj->flags & OBJ_PIPWNT)) {
 			obj->flags &= ~OBJ_PIPWNT;
 			wakeup(obj);
 		}
 	}
 	/*
 	 * For asynchronous completions, release the buffer now. The brelse
 	 * checks for B_WANTED and will do the wakeup there if necessary - so
 	 * no need to do a wakeup here in the async case.
 	 */
 
 	if (bp->b_flags & B_ASYNC) {
 		if ((bp->b_flags & (B_NOCACHE | B_INVAL | B_ERROR | B_RELBUF)) != 0)
 			brelse(bp);
 		else
 			bqrelse(bp);
 	} else {
 		bp->b_flags &= ~B_WANTED;
 		wakeup(bp);
 	}
 	splx(s);
 }
 
 static int
 count_lock_queue()
 {
 	int count;
 	struct buf *bp;
 
 	count = 0;
 	for (bp = TAILQ_FIRST(&bufqueues[QUEUE_LOCKED]);
 	    bp != NULL;
 	    bp = TAILQ_NEXT(bp, b_freelist))
 		count++;
 	return (count);
 }
 
+#if 0	/* not with kirks code */
 static int vfs_update_interval = 30;
 
 static void
 vfs_update()
 {
 	while (1) {
 		tsleep(&vfs_update_wakeup, PUSER, "update",
 		    hz * vfs_update_interval);
 		vfs_update_wakeup = 0;
 		sync(curproc, NULL);
 	}
 }
 
 static int
 sysctl_kern_updateinterval SYSCTL_HANDLER_ARGS
 {
 	int error = sysctl_handle_int(oidp,
 		oidp->oid_arg1, oidp->oid_arg2, req);
 	if (!error)
 		wakeup(&vfs_update_wakeup);
 	return error;
 }
 
 SYSCTL_PROC(_kern, KERN_UPDATEINTERVAL, update, CTLTYPE_INT|CTLFLAG_RW,
 	&vfs_update_interval, 0, sysctl_kern_updateinterval, "I", "");
+
+#endif
 
 
 /*
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj = vp->v_object;
 		vm_ooffset_t foff;
 
 		foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
 			if (m == bogus_page) {
 				m = vm_page_lookup(obj, OFF_TO_IDX(foff) + i);
 #if !defined(MAX_PERF)
 				if (!m) {
 					panic("vfs_unbusy_pages: page missing\n");
 				}
 #endif
 				bp->b_pages[i] = m;
 				pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 			}
 			--obj->paging_in_progress;
 			PAGE_BWAKEUP(m);
 		}
 		if (obj->paging_in_progress == 0 &&
 		    (obj->flags & OBJ_PIPWNT)) {
 			obj->flags &= ~OBJ_PIPWNT;
 			wakeup(obj);
 		}
 	}
 }
 
 /*
  * Set NFS' b_validoff and b_validend fields from the valid bits
  * of a page.  If the consumer is not NFS, and the page is not
  * valid for the entire range, clear the B_CACHE flag to force
  * the consumer to re-read the page.
  */
 static void
 vfs_buf_set_valid(struct buf *bp,
 		  vm_ooffset_t foff, vm_offset_t off, vm_offset_t size,
 		  vm_page_t m)
 {
 	if (bp->b_vp->v_tag == VT_NFS && bp->b_vp->v_type != VBLK) {
 		vm_offset_t svalid, evalid;
 		int validbits = m->valid;
 
 		/*
 		 * This only bothers with the first valid range in the
 		 * page.
 		 */
 		svalid = off;
 		while (validbits && !(validbits & 1)) {
 			svalid += DEV_BSIZE;
 			validbits >>= 1;
 		}
 		evalid = svalid;
 		while (validbits & 1) {
 			evalid += DEV_BSIZE;
 			validbits >>= 1;
 		}
 		/*
 		 * Make sure this range is contiguous with the range
 		 * built up from previous pages.  If not, then we will
 		 * just use the range from the previous pages.
 		 */
 		if (svalid == bp->b_validend) {
 			bp->b_validoff = min(bp->b_validoff, svalid);
 			bp->b_validend = max(bp->b_validend, evalid);
 		}
 	} else if (!vm_page_is_valid(m,
 				     (vm_offset_t) ((foff + off) & PAGE_MASK),
 				     size)) {
 		bp->b_flags &= ~B_CACHE;
 	}
 }
 
 /*
  * Set the valid bits in a page, taking care of the b_validoff,
  * b_validend fields which NFS uses to optimise small reads.  Off is
  * the offset within the file and pageno is the page index within the buf.
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	struct vnode *vp = bp->b_vp;
 	vm_ooffset_t soff, eoff;
 
 	soff = off;
 	eoff = off + min(PAGE_SIZE, bp->b_bufsize);
 	vm_page_set_invalid(m,
 			    (vm_offset_t) (soff & PAGE_MASK),
 			    (vm_offset_t) (eoff - soff));
 	if (vp->v_tag == VT_NFS && vp->v_type != VBLK) {
 		vm_ooffset_t sv, ev;
 		off = off - pageno * PAGE_SIZE;
 		sv = off + ((bp->b_validoff + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
 		ev = off + ((bp->b_validend + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1));
 		soff = max(sv, soff);
 		eoff = min(ev, eoff);
 	}
 	if (eoff > soff)
 		vm_page_set_validclean(m,
 				       (vm_offset_t) (soff & PAGE_MASK),
 				       (vm_offset_t) (eoff - soff));
 }
 
 /*
  * This routine is called before a device strategy routine.
  * It is used to tell the VM system that paging I/O is in
  * progress, and treat the pages associated with the buffer
  * almost as being PG_BUSY.  Also the object paging_in_progress
  * flag is handled to make sure that the object doesn't become
  * inconsistant.
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i,s;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj = vp->v_object;
 		vm_ooffset_t foff;
 
 		if (vp->v_type == VBLK)
 			foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
 		else
 			foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 
 		vfs_setdirty(bp);
 
 retry:
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 			if (vm_page_sleep(m, "vbpage", NULL))
 				goto retry;
 		}
 
 		for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) {
 			vm_page_t m = bp->b_pages[i];
 
 			if ((bp->b_flags & B_CLUSTER) == 0) {
 				obj->paging_in_progress++;
 				m->busy++;
 			}
 
 			vm_page_protect(m, VM_PROT_NONE);
 			if (clear_modify)
 				vfs_page_set_valid(bp, foff, i, m);
 			else if (bp->b_bcount >= PAGE_SIZE) {
 				if (m->valid && (bp->b_flags & B_CACHE) == 0) {
 					bp->b_pages[i] = bogus_page;
 					pmap_qenter(trunc_page(bp->b_data), bp->b_pages, bp->b_npages);
 				}
 			}
 		}
 	}
 }
 
 /*
  * Tell the VM system that the pages associated with this buffer
  * are clean.  This is used for delayed writes where the data is
  * going to go to disk eventually without additional VM intevention.
  */
 void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_ooffset_t foff;
 
 		if (vp->v_type == VBLK)
 			foff = (vm_ooffset_t) DEV_BSIZE * bp->b_lblkno;
 		else
 			foff = (vm_ooffset_t) vp->v_mount->mnt_stat.f_iosize * bp->b_lblkno;
 		for (i = 0; i < bp->b_npages; i++, foff += PAGE_SIZE) {
 			vm_page_t m = bp->b_pages[i];
 
 			vfs_page_set_valid(bp, foff, i, m);
 		}
 	}
 }
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
 	int i;
 	if( bp->b_flags & B_VMIO) {
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE)) {
 			int mask;
 			mask = 0;
 			for(i=0;i<bp->b_bufsize;i+=DEV_BSIZE)
 				mask |= (1 << (i/DEV_BSIZE));
 			if( bp->b_pages[0]->valid != mask) {
 				bzero(bp->b_data, bp->b_bufsize);
 			}
 			bp->b_pages[0]->valid = mask;
 			bp->b_resid = 0;
 			return;
 		}
 		for(i=0;i<bp->b_npages;i++) {
 			if( bp->b_pages[i]->valid == VM_PAGE_BITS_ALL)
 				continue;
 			if( bp->b_pages[i]->valid == 0) {
 				if ((bp->b_pages[i]->flags & PG_ZERO) == 0) {
 					bzero(bp->b_data + (i << PAGE_SHIFT), PAGE_SIZE);
 				}
 			} else {
 				int j;
 				for(j=0;j<PAGE_SIZE/DEV_BSIZE;j++) {
 					if( (bp->b_pages[i]->valid & (1<<j)) == 0)
 						bzero(bp->b_data + (i << PAGE_SHIFT) + j * DEV_BSIZE, DEV_BSIZE);
 				}
 			}
 			/* bp->b_pages[i]->valid = VM_PAGE_BITS_ALL; */
 		}
 		bp->b_resid = 0;
 	} else {
 		clrbuf(bp);
 	}
 }
 
 /*
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
  */
 void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 
 tryagain:
 
 		p = vm_page_alloc(kernel_object,
 			((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 		    VM_ALLOC_NORMAL);
 		if (!p) {
 			vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
 			VM_WAIT;
 			goto tryagain;
 		}
 		vm_page_wire(p);
 		p->valid = VM_PAGE_BITS_ALL;
 		pmap_kenter(pg, VM_PAGE_TO_PHYS(p));
 		bp->b_pages[index] = p;
 		PAGE_WAKEUP(p);
 	}
 	bp->b_npages = index;
 }
 
 void
 vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page(bp->b_data)) >> PAGE_SHIFT;
 
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
 #if !defined(MAX_PERF)
 			if (p->busy) {
 				printf("vm_hold_free_pages: blkno: %d, lblkno: %d\n",
 					bp->b_blkno, bp->b_lblkno);
 			}
 #endif
 			bp->b_pages[index] = NULL;
 			pmap_kremove(pg);
 			p->flags |= PG_BUSY;
 			vm_page_unwire(p);
 			vm_page_free(p);
 		}
 	}
 	bp->b_npages = newnpages;
 }
 
 
 #include "opt_ddb.h"
 #ifdef DDB
 #include <ddb/ddb.h>
 
 DB_SHOW_COMMAND(buffer, db_show_buffer)
 {
 	/* get args */
 	struct buf *bp = (struct buf *)addr;
 
 	if (!have_addr) {
 		db_printf("usage: show buffer <addr>\n");
 		return;
 	}
 
 	db_printf("b_proc = %p,\nb_flags = 0x%b\n", (void *)bp->b_proc,
 		  bp->b_flags, "\20\40bounce\37cluster\36vmio\35ram\34ordered"
 		  "\33paging\32xxx\31writeinprog\30wanted\27relbuf\26tape"
 		  "\25read\24raw\23phys\22clusterok\21malloc\20nocache"
 		  "\17locked\16inval\15gathered\14error\13eintr\12done\11dirty"
 		  "\10delwri\7call\6cache\5busy\4bad\3async\2needcommit\1age");
 	db_printf("b_error = %d, b_bufsize = %ld, b_bcount = %ld, "
 		  "b_resid = %ld\nb_dev = 0x%x, b_data = %p, "
 		  "b_blkno = %d, b_pblkno = %d\n",
 		  bp->b_error, bp->b_bufsize, bp->b_bcount, bp->b_resid,
 		  bp->b_dev, bp->b_data, bp->b_blkno, bp->b_pblkno);
 	if (bp->b_npages) {
 		int i;
 		db_printf("b_npages = %d, pages(OBJ, IDX, PA): ", bp->b_npages);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m;
 			m = bp->b_pages[i];
 			db_printf("(0x%x, 0x%x, 0x%x)", m->object, m->pindex,
 				VM_PAGE_TO_PHYS(m));
 			if ((i + 1) < bp->b_npages)
 				db_printf(",");
 		}
 		db_printf("\n");
 	}
 }
 #endif /* DDB */
Index: head/sys/kern/vfs_cluster.c
===================================================================
--- head/sys/kern/vfs_cluster.c	(revision 34265)
+++ head/sys/kern/vfs_cluster.c	(revision 34266)
@@ -1,796 +1,801 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  * Modifications/enhancements:
  * 	Copyright (c) 1995 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_cluster.c	8.7 (Berkeley) 2/13/94
- * $Id: vfs_cluster.c,v 1.55 1998/02/06 12:13:30 eivind Exp $
+ * $Id: vfs_cluster.c,v 1.56 1998/03/07 21:35:28 dyson Exp $
  */
 
 #include "opt_debug_cluster.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/resourcevar.h>
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 
 #if defined(CLUSTERDEBUG)
 #include <sys/sysctl.h>
 #include <sys/kernel.h>
 static int	rcluster= 0;
 SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, "");
 #endif
 
 #ifdef notyet_block_reallocation_enabled
 static struct cluster_save *
 	cluster_collectbufs __P((struct vnode *vp, struct buf *last_bp));
 #endif
 static struct buf *
 	cluster_rbuild __P((struct vnode *vp, u_quad_t filesize, daddr_t lbn,
 			    daddr_t blkno, long size, int run, struct buf *fbp));
 
 extern vm_page_t	bogus_page;
 
 /*
  * Maximum number of blocks for read-ahead.
  */
 #define MAXRA 32
 
 /*
  * This replaces bread.
  */
 int
 cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lblkno;
 	long size;
 	struct ucred *cred;
 	long totread;
 	int seqcount;
 	struct buf **bpp;
 {
 	struct buf *bp, *rbp, *reqbp;
 	daddr_t blkno, origblkno;
 	int error, num_ra;
 	int i;
 	int maxra, racluster;
 	long origtotread;
 
 	error = 0;
 	if (vp->v_maxio == 0)
 		vp->v_maxio = DFLTPHYS;
 
 	/*
 	 * Try to limit the amount of read-ahead by a few
 	 * ad-hoc parameters.  This needs work!!!
 	 */
 	racluster = vp->v_maxio/size;
 	maxra = 2 * racluster + (totread / size);
 	if (maxra > MAXRA)
 		maxra = MAXRA;
 	if (maxra > nbuf/8)
 		maxra = nbuf/8;
 
 	/*
 	 * get the requested block
 	 */
 	*bpp = reqbp = bp = getblk(vp, lblkno, size, 0, 0);
 	origblkno = lblkno;
 	origtotread = totread;
 
 	/*
 	 * if it is in the cache, then check to see if the reads have been
 	 * sequential.  If they have, then try some read-ahead, otherwise
 	 * back-off on prospective read-aheads.
 	 */
 	if (bp->b_flags & B_CACHE) {
 		if (!seqcount) {
 			return 0;
 		} else if ((bp->b_flags & B_RAM) == 0) {
 			return 0;
 		} else {
 			int s;
 			struct buf *tbp;
 			bp->b_flags &= ~B_RAM;
 			/*
 			 * We do the spl here so that there is no window
 			 * between the incore and the b_usecount increment
 			 * below.  We opt to keep the spl out of the loop
 			 * for efficiency.
 			 */
 			s = splbio();
 			for(i=1;i<maxra;i++) {
 
 				if (!(tbp = incore(vp, lblkno+i))) {
 					break;
 				}
 
 				/*
 				 * Set another read-ahead mark so we know to check
 				 * again.
 				 */
 				if (((i % racluster) == (racluster - 1)) ||
 					(i == (maxra - 1)))
 					tbp->b_flags |= B_RAM;
 
 				if ((tbp->b_usecount < 5) &&
 					((tbp->b_flags & B_BUSY) == 0) &&
 					(tbp->b_qindex == QUEUE_LRU)) {
 					TAILQ_REMOVE(&bufqueues[QUEUE_LRU], tbp, b_freelist);
 					TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], tbp, b_freelist);
 				}
 			}
 			splx(s);
 			if (i >= maxra) {
 				return 0;
 			}
 			lblkno += i;
 		}
 		reqbp = bp = NULL;
 	} else {
 		u_quad_t firstread;
 		firstread = (u_quad_t) lblkno * size;
 		if (firstread + totread > filesize)
 			totread = filesize - firstread;
 		if (totread > size) {
 			int nblks = 0;
 			int ncontigafter;
 			while (totread > 0) {
 				nblks++;
 				totread -= size;
 			}
 			if (nblks == 1)
 				goto single_block_read;
 			if (nblks > racluster)
 				nblks = racluster;
 
 	    		error = VOP_BMAP(vp, lblkno, NULL,
 				&blkno, &ncontigafter, NULL);
 			if (error)
 				goto single_block_read;
 			if (blkno == -1)
 				goto single_block_read;
 			if (ncontigafter == 0)
 				goto single_block_read;
 			if (ncontigafter + 1 < nblks)
 				nblks = ncontigafter + 1;
 
 			bp = cluster_rbuild(vp, filesize, lblkno,
 				blkno, size, nblks, bp);
 			lblkno += nblks;
 		} else {
 single_block_read:
 			/*
 			 * if it isn't in the cache, then get a chunk from
 			 * disk if sequential, otherwise just get the block.
 			 */
 			bp->b_flags |= B_READ | B_RAM;
 			lblkno += 1;
 		}
 	}
 
 	/*
 	 * if we have been doing sequential I/O, then do some read-ahead
 	 */
 	rbp = NULL;
 	if (seqcount && (lblkno < (origblkno + seqcount))) {
 		/*
 		 * we now build the read-ahead buffer if it is desirable.
 		 */
 		if (((u_quad_t)(lblkno + 1) * size) <= filesize &&
 		    !(error = VOP_BMAP(vp, lblkno, NULL, &blkno, &num_ra, NULL)) &&
 		    blkno != -1) {
 			int nblksread;
 			int ntoread = num_ra + 1;
 			nblksread = (origtotread + size - 1) / size;
 			if (seqcount < nblksread)
 				seqcount = nblksread;
 			if (seqcount < ntoread)
 				ntoread = seqcount;
 			if (num_ra) {
 				rbp = cluster_rbuild(vp, filesize, lblkno,
 					blkno, size, ntoread, NULL);
 			} else {
 				rbp = getblk(vp, lblkno, size, 0, 0);
 				rbp->b_flags |= B_READ | B_ASYNC | B_RAM;
 				rbp->b_blkno = blkno;
 			}
 		}
 	}
 
 	/*
 	 * handle the synchronous read
 	 */
 	if (bp) {
 		if (bp->b_flags & (B_DONE | B_DELWRI)) {
 			panic("cluster_read: DONE bp");
 		} else {
 #if defined(CLUSTERDEBUG)
 			if (rcluster)
 				printf("S(%d,%d,%d) ",
 					bp->b_lblkno, bp->b_bcount, seqcount);
 #endif
 			if ((bp->b_flags & B_CLUSTER) == 0)
 				vfs_busy_pages(bp, 0);
 			error = VOP_STRATEGY(bp);
 			curproc->p_stats->p_ru.ru_inblock++;
 		}
 	}
 	/*
 	 * and if we have read-aheads, do them too
 	 */
 	if (rbp) {
 		if (error) {
 			rbp->b_flags &= ~(B_ASYNC | B_READ);
 			brelse(rbp);
 		} else if (rbp->b_flags & B_CACHE) {
 			rbp->b_flags &= ~(B_ASYNC | B_READ);
 			bqrelse(rbp);
 		} else {
 #if defined(CLUSTERDEBUG)
 			if (rcluster) {
 				if (bp)
 					printf("A+(%d,%d,%d,%d) ",
 					rbp->b_lblkno, rbp->b_bcount,
 					rbp->b_lblkno - origblkno,
 					seqcount);
 				else
 					printf("A(%d,%d,%d,%d) ",
 					rbp->b_lblkno, rbp->b_bcount,
 					rbp->b_lblkno - origblkno,
 					seqcount);
 			}
 #endif
 
 			if ((rbp->b_flags & B_CLUSTER) == 0)
 				vfs_busy_pages(rbp, 0);
 			(void) VOP_STRATEGY(rbp);
 			curproc->p_stats->p_ru.ru_inblock++;
 		}
 	}
 	if (reqbp)
 		return (biowait(reqbp));
 	else
 		return (error);
 }
 
 /*
  * If blocks are contiguous on disk, use this to provide clustered
  * read ahead.  We will read as many blocks as possible sequentially
  * and then parcel them up into logical blocks in the buffer hash table.
  */
 static struct buf *
 cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 	struct vnode *vp;
 	u_quad_t filesize;
 	daddr_t lbn;
 	daddr_t blkno;
 	long size;
 	int run;
 	struct buf *fbp;
 {
 	struct buf *bp, *tbp;
 	daddr_t bn;
 	int i, inc, j;
 
 #ifdef DIAGNOSTIC
 	if (size != vp->v_mount->mnt_stat.f_iosize)
 		panic("cluster_rbuild: size %d != filesize %d\n",
 		    size, vp->v_mount->mnt_stat.f_iosize);
 #endif
 	/*
 	 * avoid a division
 	 */
 	while ((u_quad_t) size * (lbn + run) > filesize) {
 		--run;
 	}
 
 	if (fbp) {
 		tbp = fbp;
 		tbp->b_flags |= B_READ; 
 	} else {
 		tbp = getblk(vp, lbn, size, 0, 0);
 		if (tbp->b_flags & B_CACHE)
 			return tbp;
 		tbp->b_flags |= B_ASYNC | B_READ | B_RAM;
 	}
 
 	tbp->b_blkno = blkno;
 	if( (tbp->b_flags & B_MALLOC) ||
 		((tbp->b_flags & B_VMIO) == 0) || (run <= 1) )
 		return tbp;
 
 	bp = trypbuf();
 	if (bp == 0)
 		return tbp;
 
 	(vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
 	bp->b_flags = B_ASYNC | B_READ | B_CALL | B_BUSY | B_CLUSTER | B_VMIO;
 	bp->b_iodone = cluster_callback;
 	bp->b_blkno = blkno;
 	bp->b_lblkno = lbn;
 	pbgetvp(vp, bp);
 
 	TAILQ_INIT(&bp->b_cluster.cluster_head);
 
 	bp->b_bcount = 0;
 	bp->b_bufsize = 0;
 	bp->b_npages = 0;
 
 	if (vp->v_maxio == 0)
 		vp->v_maxio = DFLTPHYS;
 	inc = btodb(size);
 	for (bn = blkno, i = 0; i < run; ++i, bn += inc) {
 		if (i != 0) {
 			if ((bp->b_npages * PAGE_SIZE) +
 				round_page(size) > vp->v_maxio)
 				break;
 
 			if (incore(vp, lbn + i))
 				break;
 
 			tbp = getblk(vp, lbn + i, size, 0, 0);
 
 			if ((tbp->b_flags & B_CACHE) ||
 				(tbp->b_flags & B_VMIO) == 0) {
 				bqrelse(tbp);
 				break;
 			}
 
 			for (j=0;j<tbp->b_npages;j++) {
 				if (tbp->b_pages[j]->valid) {
 					break;
 				}
 			}
 
 			if (j != tbp->b_npages) {
 				/*
 				 * force buffer to be re-constituted later
 				 */
 				tbp->b_flags |= B_RELBUF;
 				brelse(tbp);
 				break;
 			}
 
 			if ((fbp && (i == 1)) || (i == (run - 1)))
 				tbp->b_flags |= B_RAM;
 			tbp->b_flags |= B_READ | B_ASYNC;
 			if (tbp->b_blkno == tbp->b_lblkno) {
 				tbp->b_blkno = bn;
 			} else if (tbp->b_blkno != bn) {
 				brelse(tbp);
 				break;
 			}
 		}
+		/* check for latent dependencies to be handled */
+		if ((LIST_FIRST(&tbp->b_dep)) != NULL && bioops.io_start)
+			(*bioops.io_start)(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
 			++m->busy;
 			++m->object->paging_in_progress;
 			if ((bp->b_npages == 0) ||
 				(bp->b_pages[bp->b_npages-1] != m)) {
 				bp->b_pages[bp->b_npages] = m;
 				bp->b_npages++;
 			}
 			if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
 				tbp->b_pages[j] = bogus_page;
 		}
 		bp->b_bcount += tbp->b_bcount;
 		bp->b_bufsize += tbp->b_bufsize;
 	}
 
 	for(j=0;j<bp->b_npages;j++) {
 		if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
 			VM_PAGE_BITS_ALL)
 			bp->b_pages[j] = bogus_page;
 	}
 	if (bp->b_bufsize > bp->b_kvasize)
 		panic("cluster_rbuild: b_bufsize(%d) > b_kvasize(%d)\n",
 			bp->b_bufsize, bp->b_kvasize);
 	bp->b_kvasize = bp->b_bufsize;
 
 	pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		(vm_page_t *)bp->b_pages, bp->b_npages);
 	return (bp);
 }
 
 /*
  * Cleanup after a clustered read or write.
  * This is complicated by the fact that any of the buffers might have
  * extra memory (if there were no empty buffer headers at allocbuf time)
  * that we will need to shift around.
  */
 void
 cluster_callback(bp)
 	struct buf *bp;
 {
 	struct buf *nbp, *tbp;
 	int error = 0;
 
 	/*
 	 * Must propogate errors to all the components.
 	 */
 	if (bp->b_flags & B_ERROR)
 		error = bp->b_error;
 
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
 	 */
 	for (tbp = TAILQ_FIRST(&bp->b_cluster.cluster_head);
 		tbp; tbp = nbp) {
 		nbp = TAILQ_NEXT(&tbp->b_cluster, cluster_entry);
 		if (error) {
 			tbp->b_flags |= B_ERROR;
 			tbp->b_error = error;
 		} else
 		    tbp->b_dirtyoff = tbp->b_dirtyend = 0;
 		biodone(tbp);
 	}
 	relpbuf(bp);
 }
 
 /*
  * Do clustered write for FFS.
  *
  * Three cases:
  *	1. Write is not sequential (write asynchronously)
  *	Write is sequential:
  *	2.	beginning of cluster - begin cluster
  *	3.	middle of a cluster - add to cluster
  *	4.	end of a cluster - asynchronously write cluster
  */
 void
 cluster_write(bp, filesize)
 	struct buf *bp;
 	u_quad_t filesize;
 {
 	struct vnode *vp;
 	daddr_t lbn;
 	int maxclen, cursize;
 	int lblocksize;
 	int async;
 
 	vp = bp->b_vp;
 	if (vp->v_maxio == 0)
 		vp->v_maxio = DFLTPHYS;
 	if (vp->v_type == VREG) {
 		async = vp->v_mount->mnt_flag & MNT_ASYNC;
 		lblocksize = vp->v_mount->mnt_stat.f_iosize;
 	} else {
 		async = 0;
 		lblocksize = bp->b_bufsize;
 	}
 	lbn = bp->b_lblkno;
 
 	/* Initialize vnode to beginning of file. */
 	if (lbn == 0)
 		vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
 
 	if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
 	    (bp->b_blkno != vp->v_lasta + btodb(lblocksize))) {
 		maxclen = vp->v_maxio / lblocksize - 1;
 		if (vp->v_clen != 0) {
 			/*
 			 * Next block is not sequential.
 			 *
 			 * If we are not writing at end of file, the process
 			 * seeked to another point in the file since its last
 			 * write, or we have reached our maximum cluster size,
 			 * then push the previous cluster. Otherwise try
 			 * reallocating to make it sequential.
 			 */
 			cursize = vp->v_lastw - vp->v_cstart + 1;
 #ifndef notyet_block_reallocation_enabled
 			if (((u_quad_t)(lbn + 1) * lblocksize) != filesize ||
 				lbn != vp->v_lastw + 1 ||
 				vp->v_clen <= cursize) {
 				if (!async)
 					cluster_wbuild(vp, lblocksize,
 						vp->v_cstart, cursize);
 			}
 #else
 			if ((lbn + 1) * lblocksize != filesize ||
 			    lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
 				if (!async)
 					cluster_wbuild(vp, lblocksize,
 						vp->v_cstart, cursize);
 			} else {
 				struct buf **bpp, **endbp;
 				struct cluster_save *buflist;
 
 				buflist = cluster_collectbufs(vp, bp);
 				endbp = &buflist->bs_children
 				    [buflist->bs_nchildren - 1];
 				if (VOP_REALLOCBLKS(vp, buflist)) {
 					/*
 					 * Failed, push the previous cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp < endbp; bpp++)
 						brelse(*bpp);
 					free(buflist, M_SEGMENT);
 					cluster_wbuild(vp, lblocksize,
 					    vp->v_cstart, cursize);
 				} else {
 					/*
 					 * Succeeded, keep building cluster.
 					 */
 					for (bpp = buflist->bs_children;
 					     bpp <= endbp; bpp++)
 						bdwrite(*bpp);
 					free(buflist, M_SEGMENT);
 					vp->v_lastw = lbn;
 					vp->v_lasta = bp->b_blkno;
 					return;
 				}
 			}
 #endif /* notyet_block_reallocation_enabled */
 		}
 		/*
 		 * Consider beginning a cluster. If at end of file, make
 		 * cluster as large as possible, otherwise find size of
 		 * existing cluster.
 		 */
 		if ((vp->v_type == VREG) &&
 			((u_quad_t) (lbn + 1) * lblocksize) != filesize &&
 		    (bp->b_blkno == bp->b_lblkno) &&
 		    (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) ||
 		     bp->b_blkno == -1)) {
 			bawrite(bp);
 			vp->v_clen = 0;
 			vp->v_lasta = bp->b_blkno;
 			vp->v_cstart = lbn + 1;
 			vp->v_lastw = lbn;
 			return;
 		}
 		vp->v_clen = maxclen;
 		if (!async && maxclen == 0) {	/* I/O not contiguous */
 			vp->v_cstart = lbn + 1;
 			bawrite(bp);
 		} else {	/* Wait for rest of cluster */
 			vp->v_cstart = lbn;
 			bdwrite(bp);
 		}
 	} else if (lbn == vp->v_cstart + vp->v_clen) {
 		/*
 		 * At end of cluster, write it out.
 		 */
 		bdwrite(bp);
 		cluster_wbuild(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
 		vp->v_clen = 0;
 		vp->v_cstart = lbn + 1;
 	} else
 		/*
 		 * In the middle of a cluster, so just delay the I/O for now.
 		 */
 		bdwrite(bp);
 	vp->v_lastw = lbn;
 	vp->v_lasta = bp->b_blkno;
 }
 
 
 /*
  * This is an awful lot like cluster_rbuild...wish they could be combined.
  * The last lbn argument is the current block on which I/O is being
  * performed.  Check to see that it doesn't fall in the middle of
  * the current block (if last_bp == NULL).
  */
 int
 cluster_wbuild(vp, size, start_lbn, len)
 	struct vnode *vp;
 	long size;
 	daddr_t start_lbn;
 	int len;
 {
 	struct buf *bp, *tbp;
 	int i, j, s;
 	int totalwritten = 0;
 	int dbsize = btodb(size);
 	while (len > 0) {
 		s = splbio();
 		if (((tbp = gbincore(vp, start_lbn)) == NULL) ||
 			((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) {
 			++start_lbn;
 			--len;
 			splx(s);
 			continue;
 		}
 		bremfree(tbp);
 		tbp->b_flags |= B_BUSY;
 		tbp->b_flags &= ~B_DONE;
 		splx(s);
 
 	/*
 	 * Extra memory in the buffer, punt on this buffer. XXX we could
 	 * handle this in most cases, but we would have to push the extra
 	 * memory down to after our max possible cluster size and then
 	 * potentially pull it back up if the cluster was terminated
 	 * prematurely--too much hassle.
 	 */
 		if (((tbp->b_flags & (B_CLUSTEROK|B_MALLOC)) != B_CLUSTEROK) ||
 			(tbp->b_bcount != tbp->b_bufsize) ||
 			(tbp->b_bcount != size) ||
 			len == 1) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 
 		bp = trypbuf();
 		if (bp == NULL) {
 			totalwritten += tbp->b_bufsize;
 			bawrite(tbp);
 			++start_lbn;
 			--len;
 			continue;
 		}
 
 		TAILQ_INIT(&bp->b_cluster.cluster_head);
 		bp->b_bcount = 0;
 		bp->b_bufsize = 0;
 		bp->b_npages = 0;
 		if (tbp->b_wcred != NOCRED) {
 		    bp->b_wcred = tbp->b_wcred;
 		    crhold(bp->b_wcred);
 		}
 
 		bp->b_blkno = tbp->b_blkno;
 		bp->b_lblkno = tbp->b_lblkno;
 		(vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK;
 		bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER |
 						(tbp->b_flags & (B_VMIO|B_NEEDCOMMIT));
 		bp->b_iodone = cluster_callback;
 		pbgetvp(vp, bp);
-
 		for (i = 0; i < len; ++i, ++start_lbn) {
 			if (i != 0) {
 				s = splbio();
 				if ((tbp = gbincore(vp, start_lbn)) == NULL) {
 					splx(s);
 					break;
 				}
 
 				if ((tbp->b_flags & (B_VMIO|B_CLUSTEROK|B_INVAL|B_BUSY|B_DELWRI|B_NEEDCOMMIT)) != (B_DELWRI|B_CLUSTEROK|(bp->b_flags & (B_VMIO|B_NEEDCOMMIT)))) {
 					splx(s);
 					break;
 				}
 
 				if (tbp->b_wcred != bp->b_wcred) {
 					splx(s);
 					break;
 				}
 
 				if ((tbp->b_bcount != size) ||
 					((bp->b_blkno + dbsize * i) != tbp->b_blkno) ||
 					((tbp->b_npages + bp->b_npages) > (vp->v_maxio / PAGE_SIZE))) {
 					splx(s);
 					break;
 				}
 				bremfree(tbp);
 				tbp->b_flags |= B_BUSY;
 				tbp->b_flags &= ~B_DONE;
 				splx(s);
 			}
-
+			/* check for latent dependencies to be handled */
+			if ((LIST_FIRST(&tbp->b_dep)) != NULL &&
+			    bioops.io_start)
+				(*bioops.io_start)(tbp);
 			if (tbp->b_flags & B_VMIO) {
 				vm_page_t m;
 
 				if (i != 0) {
 					for (j = 0; j < tbp->b_npages; j += 1) {
 						m = tbp->b_pages[j];
 						if (m->flags & PG_BUSY)
 							goto finishcluster;
 					}
 				}
 					
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					++m->busy;
 					++m->object->paging_in_progress;
 					if ((bp->b_npages == 0) ||
 						(bp->b_pages[bp->b_npages - 1] != m)) {
 						bp->b_pages[bp->b_npages] = m;
 						bp->b_npages++;
 					}
 				}
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
 
 			--numdirtybuffers;
 			tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
 			tbp->b_flags |= B_ASYNC;
 			s = splbio();
 			reassignbuf(tbp, tbp->b_vp);	/* put on clean list */
 			++tbp->b_vp->v_numoutput;
 			splx(s);
 			TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			(vm_page_t *) bp->b_pages, bp->b_npages);
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic("cluster_wbuild: b_bufsize(%d) > b_kvasize(%d)\n",
 				bp->b_bufsize, bp->b_kvasize);
 		bp->b_kvasize = bp->b_bufsize;
 		totalwritten += bp->b_bufsize;
 		bp->b_dirtyoff = 0;
 		bp->b_dirtyend = bp->b_bufsize;
 		bawrite(bp);
 
 		len -= i;
 	}
 	return totalwritten;
 }
 
 #ifdef notyet_block_reallocation_enabled
 /*
  * Collect together all the buffers in a cluster.
  * Plus add one additional buffer.
  */
 static struct cluster_save *
 cluster_collectbufs(vp, last_bp)
 	struct vnode *vp;
 	struct buf *last_bp;
 {
 	struct cluster_save *buflist;
 	daddr_t lbn;
 	int i, len;
 
 	len = vp->v_lastw - vp->v_cstart + 1;
 	buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
 	    M_SEGMENT, M_WAITOK);
 	buflist->bs_nchildren = 0;
 	buflist->bs_children = (struct buf **) (buflist + 1);
 	for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
 		(void) bread(vp, lbn, last_bp->b_bcount, NOCRED,
 		    &buflist->bs_children[i]);
 	buflist->bs_children[i] = last_bp;
 	buflist->bs_nchildren = i + 1;
 	return (buflist);
 }
 #endif /* notyet_block_reallocation_enabled */
Index: head/sys/kern/vfs_export.c
===================================================================
--- head/sys/kern/vfs_export.c	(revision 34265)
+++ head/sys/kern/vfs_export.c	(revision 34266)
@@ -1,2328 +1,2674 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $
+ * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/poll.h>
 #include <sys/domain.h>
 #include <sys/dirent.h>
 #include <sys/vmmeter.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
 
 #include <miscfs/specfs/specdev.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 #ifdef DDB
 static void	printlockedvnodes __P((void));
 #endif
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vfree __P((struct vnode *));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
 static unsigned long	numvnodes;
 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 /*
  * Insq/Remq for the vnode usage lists.
  */
 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
 #define	bufremvn(bp) {							\
 	LIST_REMOVE(bp, b_vnbufs);					\
 	(bp)->b_vnbufs.le_next = NOLIST;				\
 }
 
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 struct tobefreelist vnode_tobefree_list;	/* vnode free list */
 
 static u_long wantfreevnodes = 25;
 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 static u_long freevnodes = 0;
 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 
 struct mntlist mountlist;	/* mounted filesystem list */
 struct simplelock mountlist_slock;
 static struct simplelock mntid_slock;
 struct simplelock mntvnode_slock;
 static struct simplelock vnode_free_list_slock;
 static struct simplelock spechash_slock;
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
+/*
+ * The workitem queue.
+ */
+#define SYNCER_MAXDELAY		32
+int syncer_maxdelay =		SYNCER_MAXDELAY;	/* maximum delay time */
+time_t syncdelay =		30;
+int rushjob;				/* number of slots to run ASAP */
+
+static int syncer_delayno = 0;
+static long syncer_mask; 
+LIST_HEAD(synclist, vnode);
+static struct synclist *syncer_workitem_pending;
+
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 void
 vntblinit()
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	simple_lock_init(&mntvnode_slock);
 	simple_lock_init(&mntid_slock);
 	simple_lock_init(&spechash_slock);
 	TAILQ_INIT(&vnode_free_list);
 	TAILQ_INIT(&vnode_tobefree_list);
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
+	/*
+	 * Initialize the filesystem syncer.
+	 */     
+	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
+		&syncer_mask);
+	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct simplelock *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		if (interlkp) {
 			simple_unlock(interlkp);
 		}
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
 		if (interlkp) {
 			simple_lock(interlkp);
 		}
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 	    mp = mp->mnt_list.cqe_next) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			simple_unlock(&mountlist_slock);
 			return (mp);
 	    }
 	}
 	simple_unlock(&mountlist_slock);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_short xxxfs_mntid;
 
 	fsid_t tfsid;
 	int mtype;
 
 	simple_lock(&mntid_slock); 
 	mtype = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 	mp->mnt_stat.f_fsid.val[1] = mtype;
 	if (xxxfs_mntid == 0)
 		++xxxfs_mntid;
 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 	tfsid.val[1] = mtype;
 	if (mountlist.cqh_first != (void *)&mountlist) {
 		while (vfs_getvfs(&tfsid)) {
 			tfsid.val[0]++;
 			xxxfs_mntid++;
 		}
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	simple_unlock(&mntid_slock);
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 	    vap->va_fsid = vap->va_fileid =
 	    vap->va_blocksize = vap->va_rdev =
 	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 	    vap->va_flags = vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 extern vop_t **dead_vnodeop_p;
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *tvp, *nvp;
 	vm_object_t object;
 	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	TAILQ_INIT(&vnode_tmp_list);
 
 	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
 		nvp = TAILQ_NEXT(vp, v_freelist);
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		if (vp->v_flag & VAGE) {
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		} else {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		}
 		vp->v_flag &= ~(VTBFREE|VAGE);
 		vp->v_flag |= VFREE;
 		if (vp->v_usecount)
 			panic("tobe free vnode isn't");
 		freevnodes++;
 	}
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else {
 		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
 
 			nvp = TAILQ_NEXT(vp, v_freelist);
 
 			if (!simple_lock_try(&vp->v_interlock)) 
 				continue;
 			if (vp->v_usecount)
 				panic("free vnode isn't");
 
 			object = vp->v_object;
 			if (object && (object->resident_page_count || object->ref_count)) {
 				printf("object inconsistant state: RPC: %d, RC: %d\n",
 					object->resident_page_count, object->ref_count);
 				/* Don't recycle if it's caching some pages */
 				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
 				continue;
 			} else if (LIST_FIRST(&vp->v_cache_src)) {
 				/* Don't recycle if active in the namecache */
 				simple_unlock(&vp->v_interlock);
 				continue;
 			} else {
 				break;
 			}
 		}
 	}
 
 	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
 		nvp = TAILQ_NEXT(tvp, v_freelist);
 		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
 		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
 		simple_unlock(&tvp->v_interlock);
 	}
 
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 		simple_unlock(&vnode_free_list_slock);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 
 #ifdef DIAGNOSTIC
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastr = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 		vp->v_writecount = 0;	/* XXX */
 		vp->v_maxio = 0;
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		simple_lock_init(&vp->v_interlock);
 		vp->v_dd = vp;
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred, TRUE);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	simple_lock(&mntvnode_slock);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		simple_unlock(&mntvnode_slock);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	simple_unlock(&mntvnode_slock);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
-	if (flags & V_SAVE) {
+	if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) {
 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
 			return (error);
 		if (vp->v_dirtyblkhd.lh_first != NULL)
 			panic("vinvalbuf: dirty bufs");
 	}
 
 	s = splbio();
 	for (;;) {
 		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 			while (blist && blist->b_lblkno < 0)
 				blist = blist->b_vnbufs.le_next;
 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 		    (flags & V_SAVEMETA))
 			while (blist && blist->b_lblkno < 0)
 				blist = blist->b_vnbufs.le_next;
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 				continue;
 			if (bp->b_flags & B_BUSY) {
 				bp->b_flags |= B_WANTED;
 				error = tsleep((caddr_t) bp,
 				    slpflag | (PRIBIO + 1), "vinvalbuf",
 				    slptimeo);
 				if (error) {
 					splx(s);
 					return (error);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= B_BUSY;
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.
 			 */
 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						vfs_bio_awrite(bp);
 					} else {
 						bp->b_flags |= B_ASYNC;
 						VOP_BWRITE(bp);
 					}
 				} else {
 					(void) VOP_BWRITE(bp);
 				}
 				break;
 			}
 			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	simple_lock(&vp->v_interlock);
 	object = vp->v_object;
 	if (object != NULL) {
 		if (flags & V_SAVEMETA)
 			vm_object_page_remove(object, 0, object->size,
 				(flags & V_SAVE) ? TRUE : FALSE);
 		else
 			vm_object_page_remove(object, 0, 0,
 				(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	simple_unlock(&vp->v_interlock);
 
 	if (!(flags & V_SAVEMETA) &&
 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp)
 		panic("bgetvp: not free");
 #endif
 	vhold(vp);
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bufinsvn(bp, &vp->v_cleanblkhd);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	int s;
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp == (struct vnode *) 0)
 		panic("brelvp: NULL");
 #endif
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
+	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
+	if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) {
+		vp->v_flag &= ~VONWORKLST;
+		LIST_REMOVE(vp, v_synclist);
+	}
 	splx(s);
-
-	vp = bp->b_vp;
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
+ * The workitem queue.
+ * 
+ * It is useful to delay writes of file data and filesystem metadata
+ * for tens of seconds so that quickly created and deleted files need
+ * not waste disk bandwidth being created and removed. To realize this,
+ * we append vnodes to a "workitem" queue. When running with a soft
+ * updates implementation, most pending metadata dependencies should
+ * not wait for more than a few seconds. Thus, mounted on block devices
+ * are delayed only about a half the time that file data is delayed.
+ * Similarly, directory updates are more critical, so are only delayed
+ * about a third the time that file data is delayed. Thus, there are
+ * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
+ * one each second (driven off the filesystem syner process). The
+ * syncer_delayno variable indicates the next queue that is to be processed.
+ * Items that need to be processed soon are placed in this queue:
+ *
+ *	syncer_workitem_pending[syncer_delayno]
+ *
+ * A delay of fifteen seconds is done by placing the request fifteen
+ * entries later in the queue:
+ *
+ *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
+ *
+ */
+
+/*
+ * Add an item to the syncer work queue.
+ */
+void
+vn_syncer_add_to_worklist(vp, delay)
+	struct vnode *vp;
+	int delay;
+{
+	int s, slot;
+
+	s = splbio();
+
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+	}
+
+	if (delay > syncer_maxdelay - 2)
+		delay = syncer_maxdelay - 2;
+	slot = (syncer_delayno + delay) & syncer_mask;
+
+	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
+	vp->v_flag |= VONWORKLST;
+	splx(s);
+}
+
+static void sched_sync __P((void));
+static struct	proc *updateproc;
+static struct kproc_desc up_kp = {
+	"syncer",
+	sched_sync,
+	&updateproc
+};
+SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+
+/*
+ * System filesystem synchronizer daemon.
+ */
+void 
+sched_sync(void)
+{
+	struct synclist *slp;
+	struct vnode *vp;
+	long starttime;
+	int s;
+	struct proc *p = updateproc;
+
+	for (;;) {
+		starttime = time.tv_sec;
+
+		/*
+		 * Push files whose dirty time has expired.
+		 */
+		s = splbio();
+		slp = &syncer_workitem_pending[syncer_delayno];
+		syncer_delayno += 1;
+		if (syncer_delayno == syncer_maxdelay)
+			syncer_delayno = 0;
+		splx(s);
+
+		while ((vp = LIST_FIRST(slp)) != NULL) {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
+			VOP_UNLOCK(vp, 0, p);
+			if (LIST_FIRST(slp) == vp) {
+				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+				    vp->v_type != VBLK)
+					panic("sched_sync: fsync failed");
+				/*
+				 * Move ourselves to the back of the sync list.
+				 */
+				LIST_REMOVE(vp, v_synclist);
+				vn_syncer_add_to_worklist(vp, syncdelay);
+			}
+		}
+
+		/*
+		 * Do soft update processing.
+		 */
+		if (bioops.io_sync)
+			(*bioops.io_sync)(NULL);
+
+		/*
+		 * The variable rushjob allows the kernel to speed up the
+		 * processing of the filesystem syncer process. A rushjob
+		 * value of N tells the filesystem syncer to process the next
+		 * N seconds worth of work on its queue ASAP. Currently rushjob
+		 * is used by the soft update code to speed up the filesystem
+		 * syncer process when the incore state is getting so far
+		 * ahead of the disk that the kernel memory pool is being
+		 * threatened with exhaustion.
+		 */
+		if (rushjob > 0) {
+			rushjob -= 1;
+			continue;
+		}
+		/*
+		 * If it has taken us less than a second to process the
+		 * current work, then wait. Otherwise start right over
+		 * again. We can still lose time if any single round
+		 * takes more than two seconds, but it does not really
+		 * matter as we are just trying to generally pace the
+		 * filesystem activity.
+		 */
+		if (time.tv_sec == starttime)
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+	}
+}
+
+/*
  * Associate a p-buffer with a vnode.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp)
 		panic("pbgetvp: not free");
 #endif
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp == (struct vnode *) 0)
 		panic("pbrelvp: NULL");
 #endif
 
 	bp->b_vp = (struct vnode *) 0;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
+	struct buflists *listheadp;
+	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_vnbufs.le_next != NOLIST) {
 		bufremvn(bp);
 		vdrop(bp->b_vp);
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
-		tbp = newvp->v_dirtyblkhd.lh_first;
+		listheadp = &newvp->v_dirtyblkhd;
+		if ((newvp->v_flag & VONWORKLST) == 0) {
+			switch (newvp->v_type) {
+			case VDIR:
+				delay = syncdelay / 3;
+				break;
+			case VBLK:
+				if (newvp->v_specmountpoint != NULL) {
+					delay = syncdelay / 2;
+					break;
+				}
+				/* fall through */
+			default:
+				delay = syncdelay;
+			}
+			vn_syncer_add_to_worklist(newvp, delay);
+		}
+		tbp = listheadp->lh_first;
 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
-			bufinsvn(bp, &newvp->v_dirtyblkhd);
+			bufinsvn(bp, listheadp);
 		} else {
 			while (tbp->b_vnbufs.le_next &&
-				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
+			    (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
 				tbp = tbp->b_vnbufs.le_next;
 			}
 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
 		}
 	} else {
 		bufinsvn(bp, &newvp->v_cleanblkhd);
+		if ((newvp->v_flag & VONWORKLST) &&
+			LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
+			newvp->v_flag &= ~VONWORKLST;
+			LIST_REMOVE(newvp, v_synclist);
+		}
 	}
 	bp->b_vp = newvp;
 	vhold(bp->b_vp);
 	splx(s);
 }
 
 #ifndef DEVFS_ROOT
 /*
  * Create a vnode for a block device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV)
 		return (0);
 	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = 0;
 		return (error);
 	}
 	vp = nvp;
 	vp->v_type = VBLK;
 	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
 		vput(vp);
 		vp = nvp;
 	}
 	*vpp = vp;
 	return (0);
 }
 #endif /* !DEVFS_ROOT */
 
 /*
  * Check to see if the new vnode represents a special device
  * for which we already have a vnode (either because of
  * bdevvp() or because of a different vnode representing
  * the same block device). If such an alias exists, deallocate
  * the existing contents and return the aliased vnode. The
  * caller is responsible for filling it with its new contents.
  */
 struct vnode *
 checkalias(nvp, nvp_rdev, mp)
 	register struct vnode *nvp;
 	dev_t nvp_rdev;
 	struct mount *mp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp;
 	struct vnode **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 
 	vpp = &speclisth[SPECHASH(nvp_rdev)];
 loop:
 	simple_lock(&spechash_slock);
 	for (vp = *vpp; vp; vp = vp->v_specnext) {
 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		simple_lock(&vp->v_interlock);
 		if (vp->v_usecount == 0) {
 			simple_unlock(&spechash_slock);
 			vgonel(vp, p);
 			goto loop;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			simple_unlock(&spechash_slock);
 			goto loop;
 		}
 		break;
 	}
 	if (vp == NULL || vp->v_tag != VT_NON) {
 		MALLOC(nvp->v_specinfo, struct specinfo *,
 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
-		nvp->v_specflags = 0;
+		nvp->v_specmountpoint = NULL;
 		simple_unlock(&spechash_slock);
 		*vpp = nvp;
 		if (vp != NULLVP) {
 			nvp->v_flag |= VALIASED;
 			vp->v_flag |= VALIASED;
 			vput(vp);
 		}
 		return (NULLVP);
 	}
 	simple_unlock(&spechash_slock);
 	VOP_UNLOCK(vp, 0, p);
 	simple_lock(&vp->v_interlock);
 	vclean(vp, 0, p);
 	vp->v_op = nvp->v_op;
 	vp->v_tag = nvp->v_tag;
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
 	return (vp);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0) {
 		simple_lock(&vp->v_interlock);
 	}
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
 		return (ENOENT);
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
-
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			simple_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			simple_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 void
 vref(struct vnode *vp)
 {
 	simple_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if (vp == NULL)
 		panic("vrele: null vp");
 #endif
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		simple_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		simple_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if (vp == NULL)
 		panic("vput: null vp");
 #endif
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		simple_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 
 	simple_lock(&vp->v_interlock);
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * One less who cares about this vnode.
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
-		panic("holdrele: holdcnt");
+		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		simple_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			simple_unlock(&mntvnode_slock);
 			vgonel(vp, p);
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			simple_unlock(&mntvnode_slock);
 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		simple_unlock(&vp->v_interlock);
 		busy++;
 	}
 	simple_unlock(&mntvnode_slock);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 	vm_object_t obj;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 */
 	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 	if (obj = vp->v_object) {
 		if (obj->ref_count == 0) {
 			/*
 			 * This is a normal way of shutting down the object/vnode
 			 * association.
 			 */
 			vm_object_terminate(obj);
 		} else {
 			/*
 			 * Woe to the process that tries to page now :-).
 			 */
 			vm_pager_deallocate(obj);
 		}
 	}
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active)
 		vrele(vp);
 
 	cache_purge(vp);
 	if (vp->v_vnlock) {
 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
 #ifdef DIAGNOSTIC
 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 			vprint("vclean: lock not drained", vp);
 #endif
 #endif
 		FREE(vp->v_vnlock, M_VNODE);
 		vp->v_vnlock = NULL;
 	}
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if ((ap->a_flags & REVOKEALL) == 0)
 		panic("vop_revoke");
 #endif
 
 	vp = ap->a_vp;
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_flag & VALIASED) {
 		/*
 		 * If a vgone (or vclean) is already in progress,
 		 * wait until it is done and return.
 		 */
 		if (vp->v_flag & VXLOCK) {
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 			return (0);
 		}
 		/*
 		 * Ensure that vp will not be vgone'd while we
 		 * are eliminating its aliases.
 		 */
 		vp->v_flag |= VXLOCK;
 		simple_unlock(&vp->v_interlock);
 		while (vp->v_flag & VALIASED) {
 			simple_lock(&spechash_slock);
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_rdev != vp->v_rdev ||
 				    vq->v_type != vp->v_type || vp == vq)
 					continue;
 				simple_unlock(&spechash_slock);
 				vgone(vq);
 				break;
 			}
 			if (vq == NULLVP) {
 				simple_unlock(&spechash_slock);
 			}
 		}
 		/*
 		 * Remove the lock so that vgone below will
 		 * really eliminate the vnode after which time
 		 * vgone will awaken any sleepers.
 		 */
 		simple_lock(&vp->v_interlock);
 		vp->v_flag &= ~VXLOCK;
 		if (vp->v_flag & VXWANT) {
 			vp->v_flag &= ~VXWANT;
 			wakeup(vp);
 		}
 	}
 	vgonel(vp, p);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct simplelock *inter_lkp;
 	struct proc *p;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			simple_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	simple_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 	struct vnode *vq;
 	struct vnode *vx;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	simple_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 		simple_lock(&spechash_slock);
 		if (*vp->v_hashchain == vp) {
 			*vp->v_hashchain = vp->v_specnext;
 		} else {
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_specnext != vp)
 					continue;
 				vq->v_specnext = vp->v_specnext;
 				break;
 			}
 			if (vq == NULL)
 				panic("missing bdev");
 		}
 		if (vp->v_flag & VALIASED) {
 			vx = NULL;
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_rdev != vp->v_rdev ||
 				    vq->v_type != vp->v_type)
 					continue;
 				if (vx)
 					break;
 				vx = vq;
 			}
 			if (vx == NULL)
 				panic("missing alias");
 			if (vq == NULL)
 				vx->v_flag &= ~VALIASED;
 			vp->v_flag &= ~VALIASED;
 		}
 		simple_unlock(&spechash_slock);
 		FREE(vp->v_specinfo, M_VNODE);
 		vp->v_specinfo = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the back
 	 * pointer and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		simple_lock(&vnode_free_list_slock);
 		if (vp->v_flag & VFREE) {
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		} else if (vp->v_flag & VTBFREE) {
 			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 			vp->v_flag &= ~VTBFREE;
 			freevnodes++;
 		} else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	int rc = 0;
 
 	simple_lock(&spechash_slock);
 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 		if (dev != vp->v_rdev || type != vp->v_type)
 			continue;
 		*vpp = vp;
 		rc = 1;
 		break;
 	}
 	simple_unlock(&spechash_slock);
 	return (rc);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	register struct vnode *vp;
 {
 	struct vnode *vq, *vnext;
 	int count;
 
 loop:
 	if ((vp->v_flag & VALIASED) == 0)
 		return (vp->v_usecount);
 	simple_lock(&spechash_slock);
 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 		vnext = vq->v_specnext;
 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		if (vq->v_usecount == 0 && vq != vp) {
 			simple_unlock(&spechash_slock);
 			vgone(vq);
 			goto loop;
 		}
 		count += vq->v_usecount;
 	}
 	simple_unlock(&spechash_slock);
 	return (count);
 }
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	register struct vnode *vp;
 {
 	char buf[64];
 
 	if (label != NULL)
 		printf("%s: %x: ", label, vp);
 	else
 		printf("%x: ", vp);
 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VALIASED)
 		strcat(buf, "|VALIASED");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 static void
 printlockedvnodes()
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = vp->v_mntvnodes.le_next) {
 			if (VOP_ISLOCKED(vp))
 				vprint((char *)0, vp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl SYSCTL_HANDLER_ARGS
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #ifndef NO_COMPAT_PRELITE2
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #ifndef NO_COMPAT_PRELITE2
 
 static int
 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* !NO_COMPAT_PRELITE2 */
 
 static volatile int kinfo_vdebug = 1;
 
 #if 0
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode SYSCTL_HANDLER_ARGS
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 again:
 		simple_lock(&mntvnode_slock);
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				simple_unlock(&mntvnode_slock);
 				if (kinfo_vdebug)
 					printf("kinfo: vp changed\n");
 				goto again;
 			}
 			nvp = vp->v_mntvnodes.le_next;
 			simple_unlock(&mntvnode_slock);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			simple_lock(&mntvnode_slock);
 		}
 		simple_unlock(&mntvnode_slock);
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 
 	return (0);
 }
 #endif
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 #if 0
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int error = 0;
 
-	if (vp->v_specflags & SI_MOUNTEDON)
+	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
 				continue;
-			if (vq->v_specflags & SI_MOUNTEDON) {
+			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
 		}
 		simple_unlock(&spechash_slock);
 	}
 	return (error);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp, *nmp;
 	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
 	int error;
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 		nmp = mp->mnt_list.cqe_prev;
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		np->netc_anon = argp->ex_anon;
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 	bzero((caddr_t) np, i);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	np->netc_anon = argp->ex_anon;
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 
 		nvp = vp->v_mntvnodes.le_next;
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if ((vp->v_flag & VXLOCK) ||
 			(VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) {
 			continue;
 		}
 
 		simple_lock(&vp->v_interlock);
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (vp->v_object) {
 					vm_object_page_clean(vp->v_object, 0, 0, TRUE);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * If !waslocked, must be called with interlock.
  */
 int
 vfs_object_create(vp, p, cred, waslocked)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	int waslocked;
 {
 	struct vattr vat;
 	vm_object_t object;
 	int error = 0;
 
 	if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
 		if (!waslocked)
 			simple_unlock(&vp->v_interlock);
 		return 0;
 	}
 
 	if (!waslocked) 
 		vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p);
 
 retry:
 	if ((object = vp->v_object) == NULL) {
 		if (vp->v_type == VREG) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
 			object = vnode_pager_alloc(vp,
 				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
 		} else if (major(vp->v_rdev) < nblkdev) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a VBLK vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
 			object = vnode_pager_alloc(vp, INT_MAX, 0, 0);
 		}
 		object->ref_count--;
 		vp->v_usecount--;
 	} else {
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
 			tsleep(object, PVM, "vodead", 0);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
 	}
 
 	if (vp->v_object) {
 		vp->v_flag |= VOBJBUF;
 	}
 
 retn:
 	if (!waslocked) {
 		simple_lock(&vp->v_interlock);
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 	}
 
 	return error;
 }
 
 static void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	}
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	} else {
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 	}
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		simple_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
+}
+
+
+
+/*
+ * Routine to create and manage a filesystem syncer vnode.
+ */
+#define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
+int	sync_fsync __P((struct  vop_fsync_args *));
+int	sync_inactive __P((struct  vop_inactive_args *));
+int	sync_reclaim  __P((struct  vop_reclaim_args *));
+#define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
+#define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
+int	sync_print __P((struct vop_print_args *));
+#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
+
+vop_t **sync_vnodeop_p;
+struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
+	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
+	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
+	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
+	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
+	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
+	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
+	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
+	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
+	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
+	{ NULL, NULL }
+};
+struct vnodeopv_desc sync_vnodeop_opv_desc =
+	{ &sync_vnodeop_p, sync_vnodeop_entries };
+
+VNODEOP_SET(sync_vnodeop_opv_desc);
+
+/*
+ * Create a new filesystem syncer vnode for the specified mount point.
+ */
+int
+vfs_allocate_syncvnode(mp)
+	struct mount *mp;
+{
+	struct vnode *vp;
+	static long start, incr, next;
+	int error;
+
+	/* Allocate a new vnode */
+	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
+		mp->mnt_syncer = NULL;
+		return (error);
+	}
+	vp->v_type = VNON;
+	/*
+	 * Place the vnode onto the syncer worklist. We attempt to
+	 * scatter them about on the list so that they will go off
+	 * at evenly distributed times even if all the filesystems
+	 * are mounted at once.
+	 */
+	next += incr;
+	if (next == 0 || next > syncer_maxdelay) {
+		start /= 2;
+		incr /= 2;
+		if (start == 0) {
+			start = syncer_maxdelay / 2;
+			incr = syncer_maxdelay;
+		}
+		next = start;
+	}
+	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	mp->mnt_syncer = vp;
+	return (0);
+}
+
+/*
+ * Do a lazy sync of the filesystem.
+ */
+int
+sync_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *syncvp = ap->a_vp;
+	struct mount *mp = syncvp->v_mount;
+	struct proc *p = ap->a_p;
+	int asyncflag;
+
+	/*
+	 * We only need to do something if this is a lazy evaluation.
+	 */
+	if (ap->a_waitfor != MNT_LAZY)
+		return (0);
+
+	/*
+	 * Move ourselves to the back of the sync list.
+	 */
+	vn_syncer_add_to_worklist(syncvp, syncdelay);
+
+	/*
+	 * Walk the list of vnodes pushing all that are dirty and
+	 * not already on the sync list.
+	 */
+	simple_lock(&mountlist_slock);
+	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0)
+		return (0);
+	asyncflag = mp->mnt_flag & MNT_ASYNC;
+	mp->mnt_flag &= ~MNT_ASYNC;
+	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
+	if (asyncflag)
+		mp->mnt_flag |= MNT_ASYNC;
+	vfs_unbusy(mp, p);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no referenced.
+ */
+int
+sync_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vgone(ap->a_vp);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no longer needed and is being decommissioned.
+ */
+int
+sync_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	vp->v_mount->mnt_syncer = NULL;
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+		vp->v_flag &= ~VONWORKLST;
+	}
+
+	return (0);
+}
+
+/*
+ * Print out a syncer vnode.
+ */
+int
+sync_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("syncer vnode");
+	if (vp->v_vnlock != NULL)
+		lockmgr_printinfo(vp->v_vnlock);
+	printf("\n");
+	return (0);
 }
Index: head/sys/kern/vfs_extattr.c
===================================================================
--- head/sys/kern/vfs_extattr.c	(revision 34265)
+++ head/sys/kern/vfs_extattr.c	(revision 34266)
@@ -1,2826 +1,2841 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
- * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $
+ * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $
  */
 
 /* For 4.3 integer FS ID compatibility */
 #include "opt_compat.h"
 
 /*
  * XXX - The following is required because of some magic done 
  * in getdirentries() below which is only done if the translucent
  * filesystem `UNION' is compiled into the kernel.  This is broken,
  * but I don't have time to study the code deeply enough to understand
  * what's going on and determine an appropriate fix.  -GAW
  */
 #include "opt_union.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 
 #ifdef UNION
 #include <miscfs/union/union.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
 
 static int change_dir __P((struct nameidata *ndp, struct proc *p));
 static void checkdirs __P((struct vnode *olddp));
 
 static int	usermount = 0;	/* if 1, non-root can mount fs. */
 
 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
 
 /*
  * Virtual File System System Calls
  */
 
 /*
  * Mount a file system.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mount_args {
 	char	*type;
 	char	*path;
 	int	flags;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 mount(p, uap)
 	struct proc *p;
 	register struct mount_args /* {
 		syscallarg(char *) type;
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(caddr_t) data;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vfsconf *vfsp;
 	int error, flag = 0, flag2 = 0;
 	struct vattr va;
 	u_long fstypenum;
 	struct nameidata nd;
 	char fstypename[MFSNAMELEN];
 
 	if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag)))
 		return (error);
 
 	/*
 	 * Get vnode to be covered
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (SCARG(uap, flags) & MNT_UPDATE) {
 		if ((vp->v_flag & VROOT) == 0) {
 			vput(vp);
 			return (EINVAL);
 		}
 		mp = vp->v_mount;
 		flag = mp->mnt_flag;
 		flag2 = mp->mnt_kern_flag;
 		/*
 		 * We only allow the filesystem to be reloaded if it
 		 * is currently mounted read-only.
 		 */
 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
 		mp->mnt_flag |=
 		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
 		 */
 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
 		    (error = suser(p->p_ucred, &p->p_acflag))) {
 			vput(vp);
 			return (error);
 		}
 		/*
 		 * Do not allow NFS export by non-root users. Silently
 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
 		 */
 		if (p->p_ucred->cr_uid != 0) {
 			if (SCARG(uap, flags) & MNT_EXPORTED) {
 				vput(vp);
 				return (EPERM);
 			}
 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
 		}
 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
 			vput(vp);
 			return (EBUSY);
 		}
 		VOP_UNLOCK(vp, 0, p);
 		goto update;
 	}
 	/*
 	 * If the user is not root, ensure that they own the directory
 	 * onto which we are attempting to mount.
 	 */
 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
 	    (va.va_uid != p->p_ucred->cr_uid &&
 	     (error = suser(p->p_ucred, &p->p_acflag)))) {
 		vput(vp);
 		return (error);
 	}
 	/*
 	 * Do not allow NFS export by non-root users. Silently
 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		if (SCARG(uap, flags) & MNT_EXPORTED) {
 			vput(vp);
 			return (EPERM);
 		}
 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
 	}
 	if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
 		return (error);
 	if (vp->v_type != VDIR) {
 		vput(vp);
 		return (ENOTDIR);
 	}
 #ifdef COMPAT_43
 	/*
 	 * Historically filesystem types were identified by number. If we
 	 * get an integer for the filesystem type instead of a string, we
 	 * check to see if it matches one of the historic filesystem types.
 	 */
 	fstypenum = (u_long)SCARG(uap, type);
 	if (fstypenum < maxvfsconf) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == fstypenum)
 				break;
 		if (vfsp == NULL) {
 			vput(vp);
 			return (ENODEV);
 		}
 		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
 	} else
 #endif /* COMPAT_43 */
 	if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) {
 		vput(vp);
 		return (error);
 	}
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL) {
 		vput(vp);
 		return (ENODEV);
 	}
 	if (vp->v_mountedhere != NULL) {
 		vput(vp);
 		return (EBUSY);
 	}
 
 	/*
 	 * Allocate and initialize the filesystem.
 	 */
 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
 		M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_vfc = vfsp;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	vp->v_mountedhere = mp;
 	mp->mnt_vnodecovered = vp;
 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
 update:
 	/*
 	 * Set the mount level flags.
 	 */
 	if (SCARG(uap, flags) & MNT_RDONLY)
 		mp->mnt_flag |= MNT_RDONLY;
 	else if (mp->mnt_flag & MNT_RDONLY)
 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
 	/*
 	 * Mount the filesystem.
 	 */
 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
 	if (mp->mnt_flag & MNT_UPDATE) {
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
 		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
 			mp->mnt_kern_flag = flag2;
 		}
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			if (mp->mnt_syncer == NULL)
+				error = vfs_allocate_syncvnode(mp);
+		} else {
+			if (mp->mnt_syncer != NULL)
+				vrele(mp->mnt_syncer);
+			mp->mnt_syncer = NULL;
+		}
 		vfs_unbusy(mp, p);
 		return (error);
 	}
 	/*
 	 * Put the new filesystem on the mount list after root.
 	 */
 	cache_purge(vp);
 	if (!error) {
 		simple_lock(&mountlist_slock);
 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 		simple_unlock(&mountlist_slock);
 		checkdirs(vp);
 		VOP_UNLOCK(vp, 0, p);
+		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+			error = vfs_allocate_syncvnode(mp);
 		vfs_unbusy(mp, p);
 		if (error = VFS_START(mp, 0, p))
 			vrele(vp);
 	} else {
 		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
 		mp->mnt_vfc->vfc_refcount--;
 		vfs_unbusy(mp, p);
 		free((caddr_t)mp, M_MOUNT);
 		vput(vp);
 	}
 	return (error);
 }
 
 /*
  * Scan all active processes to see if any of them have a current
  * or root directory onto which the new filesystem has just been
  * mounted. If so, replace them with the new mount point.
  */
 static void
 checkdirs(olddp)
 	struct vnode *olddp;
 {
 	struct filedesc *fdp;
 	struct vnode *newdp;
 	struct proc *p;
 
 	if (olddp->v_usecount == 1)
 		return;
 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
 		panic("mount: lost mount");
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		fdp = p->p_fd;
 		if (fdp->fd_cdir == olddp) {
 			vrele(fdp->fd_cdir);
 			VREF(newdp);
 			fdp->fd_cdir = newdp;
 		}
 		if (fdp->fd_rdir == olddp) {
 			vrele(fdp->fd_rdir);
 			VREF(newdp);
 			fdp->fd_rdir = newdp;
 		}
 	}
 	if (rootvnode == olddp) {
 		vrele(rootvnode);
 		VREF(newdp);
 		rootvnode = newdp;
 	}
 	vput(newdp);
 }
 
 /*
  * Unmount a file system.
  *
  * Note: unmount takes a path to the vnode mounted on as argument,
  * not special file (as before).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unmount_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 unmount(p, uap)
 	struct proc *p;
 	register struct unmount_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	mp = vp->v_mount;
 
 	/*
 	 * Only root, or the user that did the original mount is
 	 * permitted to unmount this filesystem.
 	 */
 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
 	    (error = suser(p->p_ucred, &p->p_acflag))) {
 		vput(vp);
 		return (error);
 	}
 
 	/*
 	 * Don't allow unmounting the root file system.
 	 */
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		vput(vp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Must be the root of the filesystem
 	 */
 	if ((vp->v_flag & VROOT) == 0) {
 		vput(vp);
 		return (EINVAL);
 	}
 	vput(vp);
 	return (dounmount(mp, SCARG(uap, flags), p));
 }
 
 /*
  * Do the actual file system unmount.
  */
 int
 dounmount(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	struct vnode *coveredvp;
 	int error;
 
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
 
 	vfs_msync(mp, MNT_WAIT);
 	mp->mnt_flag &=~ MNT_ASYNC;
 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if (mp->mnt_syncer != NULL)
+		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
 	    (flags & MNT_FORCE))
 		error = VFS_UNMOUNT(mp, flags, p);
 	simple_lock(&mountlist_slock);
 	if (error) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
+			(void) vfs_allocate_syncvnode(mp);
 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
 		    &mountlist_slock, p);
 		return (error);
 	}
 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
 		coveredvp->v_mountedhere = (struct mount *)0;
 		vrele(coveredvp);
 	}
 	mp->mnt_vfc->vfc_refcount--;
 	if (mp->mnt_vnodelist.lh_first != NULL)
 		panic("unmount: dangling vnode");
 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
 	if (mp->mnt_kern_flag & MNTK_MWAIT)
 		wakeup((caddr_t)mp);
 	free((caddr_t)mp, M_MOUNT);
 	return (0);
 }
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
         int     dummy;
 };
 #endif
 
 #ifdef DEBUG
 static int syncprt = 0;
 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
 #endif
 
 /* ARGSUSED */
 int
 sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
 	register struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
-			VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p);
-			if (asyncflag)
-				mp->mnt_flag |= MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT,
+				((p != NULL) ? p->p_ucred : NOCRED), p);
+			mp->mnt_flag |= asyncflag;
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 #if 0
 /*
  * XXX don't call vfs_bufstats() yet because that routine
  * was not imported in the Lite2 merge.
  */
 #ifdef DIAGNOSTIC
 	if (syncprt)
 		vfs_bufstats();
 #endif /* DIAGNOSTIC */
 #endif
 	return (0);
 }
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 /* ARGSUSED */
 int
 quotactl(p, uap)
 	struct proc *p;
 	register struct quotactl_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) cmd;
 		syscallarg(int) uid;
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
 	register struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	vrele(nd.ni_vp);
 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
 	    SCARG(uap, arg), p));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 statfs(p, uap)
 	struct proc *p;
 	register struct statfs_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct statfs *) buf;
 	} */ *uap;
 {
 	register struct mount *mp;
 	register struct statfs *sp;
 	int error;
 	struct nameidata nd;
 	struct statfs sb;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	sp = &mp->mnt_stat;
 	vrele(nd.ni_vp);
 	error = VFS_STATFS(mp, sp, p);
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (p->p_ucred->cr_uid != 0) {
 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 fstatfs(p, uap)
 	struct proc *p;
 	register struct fstatfs_args /* {
 		syscallarg(int) fd;
 		syscallarg(struct statfs *) buf;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mount *mp;
 	register struct statfs *sp;
 	int error;
 	struct statfs sb;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	mp = ((struct vnode *)fp->f_data)->v_mount;
 	sp = &mp->mnt_stat;
 	error = VFS_STATFS(mp, sp, p);
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (p->p_ucred->cr_uid != 0) {
 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int flags;
 };
 #endif
 int
 getfsstat(p, uap)
 	struct proc *p;
 	register struct getfsstat_args /* {
 		syscallarg(struct statfs *) buf;
 		syscallarg(long) bufsize;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct mount *mp, *nmp;
 	register struct statfs *sp;
 	caddr_t sfsp;
 	long count, maxcount, error;
 
 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
 	sfsp = (caddr_t)SCARG(uap, buf);
 	count = 0;
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
-			 * If MNT_NOWAIT is specified, do not refresh the
-			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
 			 */
-			if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (SCARG(uap, flags) & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, p))) {
 				simple_lock(&mountlist_slock);
 				nmp = mp->mnt_list.cqe_next;
 				vfs_unbusy(mp, p);
 				continue;
 			}
 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
 			if (error) {
 				vfs_unbusy(mp, p);
 				return (error);
 			}
 			sfsp += sizeof(*sp);
 		}
 		count++;
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 	if (sfsp && count > maxcount)
 		p->p_retval[0] = maxcount;
 	else
 		p->p_retval[0] = count;
 	return (0);
 }
 
 /*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fchdir(p, uap)
 	struct proc *p;
 	struct fchdir_args /* {
 		syscallarg(int) fd;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct vnode *vp, *tdp;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(fdp, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VREF(vp);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 	else
 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0, 0, p))
 			continue;
 		error = VFS_ROOT(mp, &tdp);
 		vfs_unbusy(mp, p);
 		if (error)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	vrele(fdp->fd_cdir);
 	fdp->fd_cdir = vp;
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chdir(p, uap)
 	struct proc *p;
 	struct chdir_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = change_dir(&nd, p))
 		return (error);
 	vrele(fdp->fd_cdir);
 	fdp->fd_cdir = nd.ni_vp;
 	return (0);
 }
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chroot(p, uap)
 	struct proc *p;
 	struct chroot_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	int error;
 	struct nameidata nd;
 
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = change_dir(&nd, p))
 		return (error);
 	vrele(fdp->fd_rdir);
 	fdp->fd_rdir = nd.ni_vp;
 	return (0);
 }
 
 /*
  * Common routine for chroot and chdir.
  */
 static int
 change_dir(ndp, p)
 	register struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *vp;
 	int error;
 
 	error = namei(ndp);
 	if (error)
 		return (error);
 	vp = ndp->ni_vp;
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 	else
 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 	if (error)
 		vput(vp);
 	else
 		VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Check permissions, allocate an open file structure,
  * and call the device open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 open(p, uap)
 	struct proc *p;
 	register struct open_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	register struct vnode *vp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
 	struct flock lf;
 	struct nameidata nd;
 
 	oflags = SCARG(uap, flags);
 	if ((oflags & O_ACCMODE) == O_ACCMODE)
 		return (EINVAL);
 	flags = FFLAGS(oflags);
 	error = falloc(p, &nfp, &indx);
 	if (error)
 		return (error);
 	fp = nfp;
 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
 	error = vn_open(&nd, flags, cmode);
 	if (error) {
 		ffree(fp);
 		if ((error == ENODEV || error == ENXIO) &&
 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
 		    (error =
 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
 			p->p_retval[0] = indx;
 			return (0);
 		}
 		if (error == ERESTART)
 			error = EINTR;
 		fdp->fd_ofiles[indx] = NULL;
 		return (error);
 	}
 	p->p_dupfd = 0;
 	vp = nd.ni_vp;
 
 	fp->f_flag = flags & FMASK;
 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
 	fp->f_ops = &vnops;
 	fp->f_data = (caddr_t)vp;
 	if (flags & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (flags & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((flags & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		VOP_UNLOCK(vp, 0, p);
 		if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 			ffree(fp);
 			fdp->fd_ofiles[indx] = NULL;
 			return (error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		fp->f_flag |= FHASLOCK;
 	}
 	if ((vp->v_type == VREG) && (vp->v_object == NULL))
 		vfs_object_create(vp, p, p->p_ucred, TRUE);
 	VOP_UNLOCK(vp, 0, p);
 	p->p_retval[0] = indx;
 	return (0);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(p, uap)
 	struct proc *p;
 	register struct ocreat_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct open_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(int) mode;
 	} */ nuap;
 
 	SCARG(&nuap, path) = SCARG(uap, path);
 	SCARG(&nuap, mode) = SCARG(uap, mode);
 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
 	return (open(p, &nuap));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknod_args {
 	char	*path;
 	int	mode;
 	int	dev;
 };
 #endif
 /* ARGSUSED */
 int
 mknod(p, uap)
 	struct proc *p;
 	register struct mknod_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 		syscallarg(int) dev;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	int whiteout;
 	struct nameidata nd;
 
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL)
 		error = EEXIST;
 	else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
 		whiteout = 0;
 
 		switch (SCARG(uap, mode) & S_IFMT) {
 		case S_IFMT:	/* used by badsect to flag bad sectors */
 			vattr.va_type = VBAD;
 			break;
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout) {
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 			if (error)
 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 			vput(nd.ni_dvp);
 		} else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 		}
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp)
 			vrele(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkfifo(p, uap)
 	struct proc *p;
 	register struct mkfifo_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	if (nd.ni_vp != NULL) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 link(p, uap)
 	struct proc *p;
 	register struct link_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) link;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 		error = namei(&nd);
 		if (!error) {
 			if (nd.ni_vp != NULL) {
 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 				if (nd.ni_dvp == nd.ni_vp)
 					vrele(nd.ni_dvp);
 				else
 					vput(nd.ni_dvp);
 				if (nd.ni_vp)
 					vrele(nd.ni_vp);
 				error = EEXIST;
 			} else {
 				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
 				    LEASE_WRITE);
 				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			}
 		}
 	}
 	vrele(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 symlink(p, uap)
 	struct proc *p;
 	register struct symlink_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) link;
 	} */ *uap;
 {
 	struct vattr vattr;
 	char *path;
 	int error;
 	struct nameidata nd;
 
 	path = zalloc(namei_zone);
 	if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL))
 		goto out;
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if (error = namei(&nd))
 		goto out;
 	if (nd.ni_vp) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
 	zfree(namei_zone, path);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 /* ARGSUSED */
 int
 undelete(p, uap)
 	struct proc *p;
 	register struct undelete_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	error = namei(&nd);
 	if (error)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE))
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 	vput(nd.ni_dvp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 unlink(p, uap)
 	struct proc *p;
 	struct unlink_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
 
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULLVP)
 			vput(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 lseek(p, uap)
 	struct proc *p;
 	register struct lseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) offset;
 		syscallarg(int) whence;
 	} */ *uap;
 {
 	struct ucred *cred = p->p_ucred;
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct vattr vattr;
 	int error;
 
 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_VNODE)
 		return (ESPIPE);
 	switch (SCARG(uap, whence)) {
 	case L_INCR:
 		fp->f_offset += SCARG(uap, offset);
 		break;
 	case L_XTND:
 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
 		if (error)
 			return (error);
 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
 		break;
 	case L_SET:
 		fp->f_offset = SCARG(uap, offset);
 		break;
 	default:
 		return (EINVAL);
 	}
 	*(off_t *)(p->p_retval) = fp->f_offset;
 	return (0);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(p, uap)
 	struct proc *p;
 	register struct olseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(long) offset;
 		syscallarg(int) whence;
 	} */ *uap;
 {
 	struct lseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) offset;
 		syscallarg(int) whence;
 	} */ nuap;
 	int error;
 
 	SCARG(&nuap, fd) = SCARG(uap, fd);
 	SCARG(&nuap, offset) = SCARG(uap, offset);
 	SCARG(&nuap, whence) = SCARG(uap, whence);
 	error = lseek(p, &nuap);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Check access permissions.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 access(p, uap)
 	struct proc *p;
 	register struct access_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct ucred *cred = p->p_ucred;
 	register struct vnode *vp;
 	int error, flags, t_gid, t_uid;
 	struct nameidata nd;
 
 	t_uid = cred->cr_uid;
 	t_gid = cred->cr_groups[0];
 	cred->cr_uid = p->p_cred->p_ruid;
 	cred->cr_groups[0] = p->p_cred->p_rgid;
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		goto out1;
 	vp = nd.ni_vp;
 
 	/* Flags == 0 means only check for existence. */
 	if (SCARG(uap, flags)) {
 		flags = 0;
 		if (SCARG(uap, flags) & R_OK)
 			flags |= VREAD;
 		if (SCARG(uap, flags) & W_OK)
 			flags |= VWRITE;
 		if (SCARG(uap, flags) & X_OK)
 			flags |= VEXEC;
 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 			error = VOP_ACCESS(vp, flags, cred, p);
 	}
 	vput(vp);
 out1:
 	cred->cr_uid = t_uid;
 	cred->cr_groups[0] = t_gid;
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 ostat(p, uap)
 	struct proc *p;
 	register struct ostat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct ostat *) ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, p);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 olstat(p, uap)
 	struct proc *p;
 	register struct olstat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct ostat *) ub;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, p);
 	if (vp->v_type == VLNK)
 		sb.st_mode |= S_IFLNK | ACCESSPERMS;	/* 0777 */
 	vput(vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
 	return (error);
 }
 
 /*
  * Convert from an old to a new stat structure.
  */
 void
 cvtstat(st, ost)
 	struct stat *st;
 	struct ostat *ost;
 {
 
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (st->st_size < (quad_t)1 << 32)
 		ost->st_size = st->st_size;
 	else
 		ost->st_size = -2;
 	ost->st_atime = st->st_atime;
 	ost->st_mtime = st->st_mtime;
 	ost->st_ctime = st->st_ctime;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct stat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 stat(p, uap)
 	struct proc *p;
 	register struct stat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct stat *) ub;
 	} */ *uap;
 {
 	struct stat sb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, p);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 lstat(p, uap)
 	struct proc *p;
 	register struct lstat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct stat *) ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, p);
 	if (vp->v_type == VLNK)
 		sb.st_mode |= S_IFLNK | ACCESSPERMS;	/* 0777 */
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
 	return (error);
 }
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 pathconf(p, uap)
 	struct proc *p;
 	register struct pathconf_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) name;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	int	count;
 };
 #endif
 /* ARGSUSED */
 int
 readlink(p, uap)
 	struct proc *p;
 	register struct readlink_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) buf;
 		syscallarg(int) count;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VLNK)
 		error = EINVAL;
 	else {
 		aiov.iov_base = SCARG(uap, buf);
 		aiov.iov_len = SCARG(uap, count);
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_procp = p;
 		auio.uio_resid = SCARG(uap, count);
 		error = VOP_READLINK(vp, &auio, p->p_ucred);
 	}
 	vput(vp);
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 chflags(p, uap)
 	struct proc *p;
 	register struct chflags_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = SCARG(uap, flags);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 fchflags(p, uap)
 	struct proc *p;
 	register struct fchflags_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = SCARG(uap, flags);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 chmod(p, uap)
 	struct proc *p;
 	register struct chmod_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 fchmod(p, uap)
 	struct proc *p;
 	register struct fchmod_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 chown(p, uap)
 	struct proc *p;
 	register struct chown_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 lchown(p, uap)
 	struct proc *p;
 	register struct lchown_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 fchown(p, uap)
 	struct proc *p;
 	register struct fchown_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 utimes(p, uap)
 	struct proc *p;
 	register struct utimes_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct timeval *) tptr;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct timeval tv[2];
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	VATTR_NULL(&vattr);
 	if (SCARG(uap, tptr) == NULL) {
 		microtime(&tv[0]);
 		tv[1] = tv[0];
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	} else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv,
 	    sizeof (tv)))
   		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	vattr.va_atime.tv_sec = tv[0].tv_sec;
 	vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
 	vattr.va_mtime.tv_sec = tv[1].tv_sec;
 	vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 truncate(p, uap)
 	struct proc *p;
 	register struct truncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = SCARG(uap, length);
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
 	return (error);
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ftruncate_args {
 	int	fd;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 ftruncate(p, uap)
 	struct proc *p;
 	register struct ftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 	else if ((error = vn_writechk(vp)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = SCARG(uap, length);
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 otruncate(p, uap)
 	struct proc *p;
 	register struct otruncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(long) length;
 	} */ *uap;
 {
 	struct truncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ nuap;
 
 	SCARG(&nuap, path) = SCARG(uap, path);
 	SCARG(&nuap, length) = SCARG(uap, length);
 	return (truncate(p, &nuap));
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct oftruncate_args {
 	int	fd;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 oftruncate(p, uap)
 	struct proc *p;
 	register struct oftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(long) length;
 	} */ *uap;
 {
 	struct ftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ nuap;
 
 	SCARG(&nuap, fd) = SCARG(uap, fd);
 	SCARG(&nuap, length) = SCARG(uap, length);
 	return (ftruncate(p, &nuap));
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fsync(p, uap)
 	struct proc *p;
 	struct fsync_args /* {
 		syscallarg(int) fd;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) {
 		if (vp->v_object) {
 			vm_object_page_clean(vp->v_object, 0, 0, FALSE);
 		}
 		error = VOP_FSYNC(vp, fp->f_cred,
 			(vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? 
 			MNT_NOWAIT : MNT_WAIT, p);
 		VOP_UNLOCK(vp, 0, p);
 	}
 	return (error);
 }
 
 /*
  * Rename files.  Source and destination must either both be directories,
  * or both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 /* ARGSUSED */
 int
 rename(p, uap)
 	struct proc *p;
 	register struct rename_args /* {
 		syscallarg(char *) from;
 		syscallarg(char *) to;
 	} */ *uap;
 {
 	register struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
 	    SCARG(uap, from), p);
 	if (error = namei(&fromnd))
 		return (error);
 	fvp = fromnd.ni_vp;
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if (error = namei(&tond)) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 	if (fvp == tdvp)
 		error = EINVAL;
 	/*
 	 * If source is the same as the destination (that is the
 	 * same inode number with the same name in the same directory),
 	 * then there is nothing to do.
 	 */
 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 	      fromnd.ni_cnd.cn_namelen))
 		error = -1;
 out:
 	if (!error) {
 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
 		if (fromnd.ni_dvp != tdvp) {
 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		if (tvp) {
 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 	} else {
 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
 	zfree(namei_zone, tond.ni_cnd.cn_pnbuf);
 out1:
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkdir(p, uap)
 	struct proc *p;
 	register struct mkdir_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (!error)
 		vput(nd.ni_vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 rmdir(p, uap)
 	struct proc *p;
 	struct rmdir_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_flag & VROOT)
 		error = EBUSY;
 out:
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vput(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Read a block of directory entries in a file system independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(p, uap)
 	struct proc *p;
 	register struct ogetdirentries_args /* {
 		syscallarg(int) fd;
 		syscallarg(char *) buf;
 		syscallarg(u_int) count;
 		syscallarg(long *) basep;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	struct uio auio, kuio;
 	struct iovec aiov, kiov;
 	struct dirent *dp, *edp;
 	caddr_t dirbuf;
 	int error, eofflag, readcnt;
 	long loff;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	vp = (struct vnode *)fp->f_data;
 unionread:
 	if (vp->v_type != VDIR)
 		return (EINVAL);
 	aiov.iov_base = SCARG(uap, buf);
 	aiov.iov_len = SCARG(uap, count);
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_resid = SCARG(uap, count);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	loff = auio.uio_offset = fp->f_offset;
 #	if (BYTE_ORDER != LITTLE_ENDIAN)
 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 			fp->f_offset = auio.uio_offset;
 		} else
 #	endif
 	{
 		kuio = auio;
 		kuio.uio_iov = &kiov;
 		kuio.uio_segflg = UIO_SYSSPACE;
 		kiov.iov_len = SCARG(uap, count);
 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
 		kiov.iov_base = dirbuf;
 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 		fp->f_offset = kuio.uio_offset;
 		if (error == 0) {
 			readcnt = SCARG(uap, count) - kuio.uio_resid;
 			edp = (struct dirent *)&dirbuf[readcnt];
 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 #				if (BYTE_ORDER == LITTLE_ENDIAN)
 					/*
 					 * The expected low byte of
 					 * dp->d_namlen is our dp->d_type.
 					 * The high MBZ byte of dp->d_namlen
 					 * is our dp->d_namlen.
 					 */
 					dp->d_type = dp->d_namlen;
 					dp->d_namlen = 0;
 #				else
 					/*
 					 * The dp->d_type is the high byte
 					 * of the expected dp->d_namlen,
 					 * so must be zero'ed.
 					 */
 					dp->d_type = 0;
 #				endif
 				if (dp->d_reclen > 0) {
 					dp = (struct dirent *)
 					    ((char *)dp + dp->d_reclen);
 				} else {
 					error = EIO;
 					break;
 				}
 			}
 			if (dp >= edp)
 				error = uiomove(dirbuf, readcnt, &auio);
 		}
 		FREE(dirbuf, M_TEMP);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	if (error)
 		return (error);
 
 #ifdef UNION
 {
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_op == union_vnodeop_p)) {
 		struct vnode *lvp;
 
 		lvp = union_dircache(vp, p);
 		if (lvp != NULLVP) {
 			struct vattr va;
 
 			/*
 			 * If the directory is opaque,
 			 * then don't show lower entries
 			 */
 			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
 			if (va.va_flags & OPAQUE) {
 				vput(lvp);
 				lvp = NULL;
 			}
 		}
 		
 		if (lvp != NULLVP) {
 			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
 			if (error) {
 				vput(lvp);
 				return (error);
 			}
 			VOP_UNLOCK(lvp, 0, p);
 			fp->f_data = (caddr_t) lvp;
 			fp->f_offset = 0;
 			error = vn_close(vp, FREAD, fp->f_cred, p);
 			if (error)
 				return (error);
 			vp = lvp;
 			goto unionread;
 		}
 	}
 }
 #endif /* UNION */
 
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_flag & VROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_data = (caddr_t) vp;
 		fp->f_offset = 0;
 		vrele(tvp);
 		goto unionread;
 	}
 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
 	    sizeof(long));
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Read a block of directory entries in a file system independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 getdirentries(p, uap)
 	struct proc *p;
 	register struct getdirentries_args /* {
 		syscallarg(int) fd;
 		syscallarg(char *) buf;
 		syscallarg(u_int) count;
 		syscallarg(long *) basep;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	long loff;
 	int error, eofflag;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	vp = (struct vnode *)fp->f_data;
 unionread:
 	if (vp->v_type != VDIR)
 		return (EINVAL);
 	aiov.iov_base = SCARG(uap, buf);
 	aiov.iov_len = SCARG(uap, count);
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_resid = SCARG(uap, count);
 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	loff = auio.uio_offset = fp->f_offset;
 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
 	fp->f_offset = auio.uio_offset;
 	VOP_UNLOCK(vp, 0, p);
 	if (error)
 		return (error);
 
 #ifdef UNION
 {
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_op == union_vnodeop_p)) {
 		struct vnode *lvp;
 
 		lvp = union_dircache(vp, p);
 		if (lvp != NULLVP) {
 			struct vattr va;
 
 			/*
 			 * If the directory is opaque,
 			 * then don't show lower entries
 			 */
 			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
 			if (va.va_flags & OPAQUE) {
 				vput(lvp);
 				lvp = NULL;
 			}
 		}
 
 		if (lvp != NULLVP) {
 			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
 			if (error) {
 				vput(lvp);
 				return (error);
 			}
 			VOP_UNLOCK(lvp, 0, p);
 			fp->f_data = (caddr_t) lvp;
 			fp->f_offset = 0;
 			error = vn_close(vp, FREAD, fp->f_cred, p);
 			if (error)
 				return (error);
 			vp = lvp;
 			goto unionread;
 		}
 	}
 }
 #endif /* UNION */
 
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_flag & VROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_data = (caddr_t) vp;
 		fp->f_offset = 0;
 		vrele(tvp);
 		goto unionread;
 	}
 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
 	    sizeof(long));
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 umask(p, uap)
 	struct proc *p;
 	struct umask_args /* {
 		syscallarg(int) newmask;
 	} */ *uap;
 {
 	register struct filedesc *fdp;
 
 	fdp = p->p_fd;
 	p->p_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem
  * away from vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 revoke(p, uap)
 	struct proc *p;
 	register struct revoke_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
 		goto out;
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser(p->p_ucred, &p->p_acflag)))
 		goto out;
 	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
 		VOP_REVOKE(vp, REVOKEALL);
 out:
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry.
  */
 int
 getvnode(fdp, fd, fpp)
 	struct filedesc *fdp;
 	int fd;
 	struct file **fpp;
 {
 	struct file *fp;
 
 	if ((u_int)fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
 		return (EINVAL);
 	*fpp = fp;
 	return (0);
 }
 #ifndef _SYS_SYSPROTO_H_
 struct  __getcwd_args {
 	u_char	*buf;
 	u_int	buflen;
 };
 #endif
 #define STATNODE(mode, name, var) \
 	SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
 
 static int disablecwd;
 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
 
 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
 int
 __getcwd(p, uap)
 	struct proc *p;
 	struct __getcwd_args *uap;
 {
 	char *bp, *buf;
 	int error, i, slash_prefixed;
 	struct filedesc *fdp;
 	struct namecache *ncp;
 	struct vnode *vp;
 
 	numcwdcalls++;
 	if (disablecwd)
 		return (ENODEV);
 	if (uap->buflen < 2)
 		return (EINVAL);
 	if (uap->buflen > MAXPATHLEN)
 		uap->buflen = MAXPATHLEN;
 	buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK);
 	bp += uap->buflen - 1;
 	*bp = '\0';
 	fdp = p->p_fd;
 	slash_prefixed = 0;
 	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
 		if (vp->v_flag & VROOT) {
 			vp = vp->v_mount->mnt_vnodecovered;
 			continue;
 		}
 		if (vp->v_dd->v_id != vp->v_ddid) {
 			numcwdfail1++;
 			free(buf, M_TEMP);
 			return (ENOTDIR);
 		}
 		ncp = TAILQ_FIRST(&vp->v_cache_dst);
 		if (!ncp) {
 			numcwdfail2++;
 			free(buf, M_TEMP);
 			return (ENOENT);
 		}
 		if (ncp->nc_dvp != vp->v_dd) {
 			numcwdfail3++;
 			free(buf, M_TEMP);
 			return (EBADF);
 		}
 		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
 			if (bp == buf) {
 				numcwdfail4++;
 				free(buf, M_TEMP);
 				return (ENOMEM);
 			}
 			*--bp = ncp->nc_name[i];
 		}
 		if (bp == buf) {
 			numcwdfail4++;
 			free(buf, M_TEMP);
 			return (ENOMEM);
 		}
 		*--bp = '/';
 		slash_prefixed = 1;
 		vp = vp->v_dd;
 	}
 	if (!slash_prefixed) {
 		if (bp == buf) {
 			numcwdfail4++;
 			free(buf, M_TEMP);
 			return (ENOMEM);
 		}
 		*--bp = '/';
 	}
 	numcwdfound++;
 	error = copyout(bp, uap->buf, strlen(bp) + 1);
 	free(buf, M_TEMP);
 	return (error);
 }
Index: head/sys/kern/vfs_subr.c
===================================================================
--- head/sys/kern/vfs_subr.c	(revision 34265)
+++ head/sys/kern/vfs_subr.c	(revision 34266)
@@ -1,2328 +1,2674 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.136 1998/03/01 23:07:45 dyson Exp $
+ * $Id: vfs_subr.c,v 1.137 1998/03/07 21:35:35 dyson Exp $
  */
 
 /*
  * External virtual filesystem routines
  */
 #include "opt_ddb.h"
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/poll.h>
 #include <sys/domain.h>
 #include <sys/dirent.h>
 #include <sys/vmmeter.h>
 
 #include <machine/limits.h>
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
 
 #include <miscfs/specfs/specdev.h>
 
 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure");
 
 static void	insmntque __P((struct vnode *vp, struct mount *mp));
 #ifdef DDB
 static void	printlockedvnodes __P((void));
 #endif
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vfree __P((struct vnode *));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
 static unsigned long	numvnodes;
 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 
 enum vtype iftovt_tab[16] = {
 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
 };
 int vttoif_tab[9] = {
 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
 	S_IFSOCK, S_IFIFO, S_IFMT,
 };
 
 /*
  * Insq/Remq for the vnode usage lists.
  */
 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
 #define	bufremvn(bp) {							\
 	LIST_REMOVE(bp, b_vnbufs);					\
 	(bp)->b_vnbufs.le_next = NOLIST;				\
 }
 
 static TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
 struct tobefreelist vnode_tobefree_list;	/* vnode free list */
 
 static u_long wantfreevnodes = 25;
 SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
 static u_long freevnodes = 0;
 SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
 
 int vfs_ioopt = 0;
 SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, "");
 
 struct mntlist mountlist;	/* mounted filesystem list */
 struct simplelock mountlist_slock;
 static struct simplelock mntid_slock;
 struct simplelock mntvnode_slock;
 static struct simplelock vnode_free_list_slock;
 static struct simplelock spechash_slock;
 struct nfs_public nfs_pub;	/* publicly exported FS */
 static vm_zone_t vnode_zone;
 
+/*
+ * The workitem queue.
+ */
+#define SYNCER_MAXDELAY		32
+int syncer_maxdelay =		SYNCER_MAXDELAY;	/* maximum delay time */
+time_t syncdelay =		30;
+int rushjob;				/* number of slots to run ASAP */
+
+static int syncer_delayno = 0;
+static long syncer_mask; 
+LIST_HEAD(synclist, vnode);
+static struct synclist *syncer_workitem_pending;
+
 int desiredvnodes;
 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "");
 
 static void	vfs_free_addrlist __P((struct netexport *nep));
 static int	vfs_free_netcred __P((struct radix_node *rn, void *w));
 static int	vfs_hang_addrlist __P((struct mount *mp, struct netexport *nep,
 				       struct export_args *argp));
 
 /*
  * Initialize the vnode management data structures.
  */
 void
 vntblinit()
 {
 
 	desiredvnodes = maxproc + cnt.v_page_count / 4;
 	simple_lock_init(&mntvnode_slock);
 	simple_lock_init(&mntid_slock);
 	simple_lock_init(&spechash_slock);
 	TAILQ_INIT(&vnode_free_list);
 	TAILQ_INIT(&vnode_tobefree_list);
 	simple_lock_init(&vnode_free_list_slock);
 	CIRCLEQ_INIT(&mountlist);
 	vnode_zone = zinit("VNODE", sizeof (struct vnode), 0, 0, 5);
+	/*
+	 * Initialize the filesystem syncer.
+	 */     
+	syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, 
+		&syncer_mask);
+	syncer_maxdelay = syncer_mask + 1;
 }
 
 /*
  * Mark a mount point as busy. Used to synchronize access and to delay
  * unmounting. Interlock is not released on failure.
  */
 int
 vfs_busy(mp, flags, interlkp, p)
 	struct mount *mp;
 	int flags;
 	struct simplelock *interlkp;
 	struct proc *p;
 {
 	int lkflags;
 
 	if (mp->mnt_kern_flag & MNTK_UNMOUNT) {
 		if (flags & LK_NOWAIT)
 			return (ENOENT);
 		mp->mnt_kern_flag |= MNTK_MWAIT;
 		if (interlkp) {
 			simple_unlock(interlkp);
 		}
 		/*
 		 * Since all busy locks are shared except the exclusive
 		 * lock granted when unmounting, the only place that a
 		 * wakeup needs to be done is at the release of the
 		 * exclusive lock at the end of dounmount.
 		 */
 		tsleep((caddr_t)mp, PVFS, "vfs_busy", 0);
 		if (interlkp) {
 			simple_lock(interlkp);
 		}
 		return (ENOENT);
 	}
 	lkflags = LK_SHARED | LK_NOPAUSE;
 	if (interlkp)
 		lkflags |= LK_INTERLOCK;
 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
 		panic("vfs_busy: unexpected lock failure");
 	return (0);
 }
 
 /*
  * Free a busy filesystem.
  */
 void
 vfs_unbusy(mp, p)
 	struct mount *mp;
 	struct proc *p;
 {
 
 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
 }
 
 /*
  * Lookup a filesystem type, and if found allocate and initialize
  * a mount structure for it.
  *
  * Devname is usually updated by mount(8) after booting.
  */
 int
 vfs_rootmountalloc(fstypename, devname, mpp)
 	char *fstypename;
 	char *devname;
 	struct mount **mpp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vfsconf *vfsp;
 	struct mount *mp;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL)
 		return (ENODEV);
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	LIST_INIT(&mp->mnt_vnodelist);
 	mp->mnt_vfc = vfsp;
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_flag = MNT_RDONLY;
 	mp->mnt_vnodecovered = NULLVP;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	mp->mnt_stat.f_mntonname[0] = '/';
 	mp->mnt_stat.f_mntonname[1] = 0;
 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
 	*mpp = mp;
 	return (0);
 }
 
 /*
  * Find an appropriate filesystem to use for the root. If a filesystem
  * has not been preselected, walk through the list of known filesystems
  * trying those that have mountroot routines, and try them until one
  * works or we have tried them all.
  */
 #ifdef notdef	/* XXX JH */
 int
 lite2_vfs_mountroot()
 {
 	struct vfsconf *vfsp;
 	extern int (*lite2_mountroot) __P((void));
 	int error;
 
 	if (lite2_mountroot != NULL)
 		return ((*lite2_mountroot)());
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		if (vfsp->vfc_mountroot == NULL)
 			continue;
 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
 			return (0);
 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
 	}
 	return (ENODEV);
 }
 #endif
 
 /*
  * Lookup a mount point by filesystem identifier.
  */
 struct mount *
 vfs_getvfs(fsid)
 	fsid_t *fsid;
 {
 	register struct mount *mp;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
 	    mp = mp->mnt_list.cqe_next) {
 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
 			simple_unlock(&mountlist_slock);
 			return (mp);
 	    }
 	}
 	simple_unlock(&mountlist_slock);
 	return ((struct mount *) 0);
 }
 
 /*
  * Get a new unique fsid
  */
 void
 vfs_getnewfsid(mp)
 	struct mount *mp;
 {
 	static u_short xxxfs_mntid;
 
 	fsid_t tfsid;
 	int mtype;
 
 	simple_lock(&mntid_slock); 
 	mtype = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
 	mp->mnt_stat.f_fsid.val[1] = mtype;
 	if (xxxfs_mntid == 0)
 		++xxxfs_mntid;
 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
 	tfsid.val[1] = mtype;
 	if (mountlist.cqh_first != (void *)&mountlist) {
 		while (vfs_getvfs(&tfsid)) {
 			tfsid.val[0]++;
 			xxxfs_mntid++;
 		}
 	}
 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
 	simple_unlock(&mntid_slock);
 }
 
 /*
  * Set vnode attributes to VNOVAL
  */
 void
 vattr_null(vap)
 	register struct vattr *vap;
 {
 
 	vap->va_type = VNON;
 	vap->va_size = VNOVAL;
 	vap->va_bytes = VNOVAL;
 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
 	    vap->va_fsid = vap->va_fileid =
 	    vap->va_blocksize = vap->va_rdev =
 	    vap->va_atime.tv_sec = vap->va_atime.tv_nsec =
 	    vap->va_mtime.tv_sec = vap->va_mtime.tv_nsec =
 	    vap->va_ctime.tv_sec = vap->va_ctime.tv_nsec =
 	    vap->va_flags = vap->va_gen = VNOVAL;
 	vap->va_vaflags = 0;
 }
 
 /*
  * Routines having to do with the management of the vnode table.
  */
 extern vop_t **dead_vnodeop_p;
 
 /*
  * Return the next vnode from the free list.
  */
 int
 getnewvnode(tag, mp, vops, vpp)
 	enum vtagtype tag;
 	struct mount *mp;
 	vop_t **vops;
 	struct vnode **vpp;
 {
 	int s;
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *tvp, *nvp;
 	vm_object_t object;
 	TAILQ_HEAD(freelst, vnode) vnode_tmp_list;
 
 	/*
 	 * We take the least recently used vnode from the freelist
 	 * if we can get it and it has no cached pages, and no
 	 * namecache entries are relative to it.
 	 * Otherwise we allocate a new vnode
 	 */
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	TAILQ_INIT(&vnode_tmp_list);
 
 	for (vp = TAILQ_FIRST(&vnode_tobefree_list); vp; vp = nvp) {
 		nvp = TAILQ_NEXT(vp, v_freelist);
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		if (vp->v_flag & VAGE) {
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		} else {
 			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 		}
 		vp->v_flag &= ~(VTBFREE|VAGE);
 		vp->v_flag |= VFREE;
 		if (vp->v_usecount)
 			panic("tobe free vnode isn't");
 		freevnodes++;
 	}
 
 	if (wantfreevnodes && freevnodes < wantfreevnodes) {
 		vp = NULL;
 	} else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
 		/* 
 		 * XXX: this is only here to be backwards compatible
 		 */
 		vp = NULL;
 	} else {
 		for (vp = TAILQ_FIRST(&vnode_free_list); vp; vp = nvp) {
 
 			nvp = TAILQ_NEXT(vp, v_freelist);
 
 			if (!simple_lock_try(&vp->v_interlock)) 
 				continue;
 			if (vp->v_usecount)
 				panic("free vnode isn't");
 
 			object = vp->v_object;
 			if (object && (object->resident_page_count || object->ref_count)) {
 				printf("object inconsistant state: RPC: %d, RC: %d\n",
 					object->resident_page_count, object->ref_count);
 				/* Don't recycle if it's caching some pages */
 				TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 				TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist);
 				continue;
 			} else if (LIST_FIRST(&vp->v_cache_src)) {
 				/* Don't recycle if active in the namecache */
 				simple_unlock(&vp->v_interlock);
 				continue;
 			} else {
 				break;
 			}
 		}
 	}
 
 	for (tvp = TAILQ_FIRST(&vnode_tmp_list); tvp; tvp = nvp) {
 		nvp = TAILQ_NEXT(tvp, v_freelist);
 		TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist);
 		TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist);
 		simple_unlock(&tvp->v_interlock);
 	}
 
 	if (vp) {
 		vp->v_flag |= VDOOMED;
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 		simple_unlock(&vnode_free_list_slock);
 		cache_purge(vp);
 		vp->v_lease = NULL;
 		if (vp->v_type != VBAD) {
 			vgonel(vp, p);
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 
 #ifdef DIAGNOSTIC
 		{
 			int s;
 
 			if (vp->v_data)
 				panic("cleaned vnode isn't");
 			s = splbio();
 			if (vp->v_numoutput)
 				panic("Clean vnode has pending I/O's");
 			splx(s);
 		}
 #endif
 		vp->v_flag = 0;
 		vp->v_lastr = 0;
 		vp->v_lastw = 0;
 		vp->v_lasta = 0;
 		vp->v_cstart = 0;
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 		vp->v_writecount = 0;	/* XXX */
 		vp->v_maxio = 0;
 	} else {
 		simple_unlock(&vnode_free_list_slock);
 		vp = (struct vnode *) zalloc(vnode_zone);
 		bzero((char *) vp, sizeof *vp);
 		simple_lock_init(&vp->v_interlock);
 		vp->v_dd = vp;
 		cache_purge(vp);
 		LIST_INIT(&vp->v_cache_src);
 		TAILQ_INIT(&vp->v_cache_dst);
 		numvnodes++;
 	}
 
 	vp->v_type = VNON;
 	vp->v_tag = tag;
 	vp->v_op = vops;
 	insmntque(vp, mp);
 	*vpp = vp;
 	vp->v_usecount = 1;
 	vp->v_data = 0;
 	splx(s);
 
 	vfs_object_create(vp, p, p->p_ucred, TRUE);
 	return (0);
 }
 
 /*
  * Move a vnode from one mount queue to another.
  */
 static void
 insmntque(vp, mp)
 	register struct vnode *vp;
 	register struct mount *mp;
 {
 
 	simple_lock(&mntvnode_slock);
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		LIST_REMOVE(vp, v_mntvnodes);
 	/*
 	 * Insert into list of vnodes for the new mount point, if available.
 	 */
 	if ((vp->v_mount = mp) == NULL) {
 		simple_unlock(&mntvnode_slock);
 		return;
 	}
 	LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
 	simple_unlock(&mntvnode_slock);
 }
 
 /*
  * Update outstanding I/O count and do wakeup if requested.
  */
 void
 vwakeup(bp)
 	register struct buf *bp;
 {
 	register struct vnode *vp;
 
 	bp->b_flags &= ~B_WRITEINPROG;
 	if ((vp = bp->b_vp)) {
 		vp->v_numoutput--;
 		if (vp->v_numoutput < 0)
 			panic("vwakeup: neg numoutput");
 		if ((vp->v_numoutput == 0) && (vp->v_flag & VBWAIT)) {
 			vp->v_flag &= ~VBWAIT;
 			wakeup((caddr_t) &vp->v_numoutput);
 		}
 	}
 }
 
 /*
  * Flush out and invalidate all buffers associated with a vnode.
  * Called with the underlying object locked.
  */
 int
 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	register struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int slpflag, slptimeo;
 {
 	register struct buf *bp;
 	struct buf *nbp, *blist;
 	int s, error;
 	vm_object_t object;
 
-	if (flags & V_SAVE) {
+	if ((flags & V_SAVE) && vp->v_dirtyblkhd.lh_first != NULL) {
 		if ((error = VOP_FSYNC(vp, cred, MNT_WAIT, p)))
 			return (error);
 		if (vp->v_dirtyblkhd.lh_first != NULL)
 			panic("vinvalbuf: dirty bufs");
 	}
 
 	s = splbio();
 	for (;;) {
 		if ((blist = vp->v_cleanblkhd.lh_first) && (flags & V_SAVEMETA))
 			while (blist && blist->b_lblkno < 0)
 				blist = blist->b_vnbufs.le_next;
 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
 		    (flags & V_SAVEMETA))
 			while (blist && blist->b_lblkno < 0)
 				blist = blist->b_vnbufs.le_next;
 		if (!blist)
 			break;
 
 		for (bp = blist; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if ((flags & V_SAVEMETA) && bp->b_lblkno < 0)
 				continue;
 			if (bp->b_flags & B_BUSY) {
 				bp->b_flags |= B_WANTED;
 				error = tsleep((caddr_t) bp,
 				    slpflag | (PRIBIO + 1), "vinvalbuf",
 				    slptimeo);
 				if (error) {
 					splx(s);
 					return (error);
 				}
 				break;
 			}
 			bremfree(bp);
 			bp->b_flags |= B_BUSY;
 			/*
 			 * XXX Since there are no node locks for NFS, I
 			 * believe there is a slight chance that a delayed
 			 * write will occur while sleeping just above, so
 			 * check for it.
 			 */
 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
 				if (bp->b_vp == vp) {
 					if (bp->b_flags & B_CLUSTEROK) {
 						vfs_bio_awrite(bp);
 					} else {
 						bp->b_flags |= B_ASYNC;
 						VOP_BWRITE(bp);
 					}
 				} else {
 					(void) VOP_BWRITE(bp);
 				}
 				break;
 			}
 			bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF);
 			brelse(bp);
 		}
 	}
 
 	while (vp->v_numoutput > 0) {
 		vp->v_flag |= VBWAIT;
 		tsleep(&vp->v_numoutput, PVM, "vnvlbv", 0);
 	}
 
 	splx(s);
 
 	/*
 	 * Destroy the copy in the VM cache, too.
 	 */
 	simple_lock(&vp->v_interlock);
 	object = vp->v_object;
 	if (object != NULL) {
 		if (flags & V_SAVEMETA)
 			vm_object_page_remove(object, 0, object->size,
 				(flags & V_SAVE) ? TRUE : FALSE);
 		else
 			vm_object_page_remove(object, 0, 0,
 				(flags & V_SAVE) ? TRUE : FALSE);
 	}
 	simple_unlock(&vp->v_interlock);
 
 	if (!(flags & V_SAVEMETA) &&
 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
 		panic("vinvalbuf: flush failed");
 	return (0);
 }
 
 /*
  * Associate a buffer with a vnode.
  */
 void
 bgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 	int s;
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp)
 		panic("bgetvp: not free");
 #endif
 	vhold(vp);
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 	/*
 	 * Insert onto list for new vnode.
 	 */
 	s = splbio();
 	bufinsvn(bp, &vp->v_cleanblkhd);
 	splx(s);
 }
 
 /*
  * Disassociate a buffer from a vnode.
  */
 void
 brelvp(bp)
 	register struct buf *bp;
 {
 	struct vnode *vp;
 	int s;
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp == (struct vnode *) 0)
 		panic("brelvp: NULL");
 #endif
 
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
+	vp = bp->b_vp;
 	s = splbio();
 	if (bp->b_vnbufs.le_next != NOLIST)
 		bufremvn(bp);
+	if ((vp->v_flag & VONWORKLST) && (LIST_FIRST(&vp->v_dirtyblkhd) == NULL)) {
+		vp->v_flag &= ~VONWORKLST;
+		LIST_REMOVE(vp, v_synclist);
+	}
 	splx(s);
-
-	vp = bp->b_vp;
 	bp->b_vp = (struct vnode *) 0;
 	vdrop(vp);
 }
 
 /*
+ * The workitem queue.
+ * 
+ * It is useful to delay writes of file data and filesystem metadata
+ * for tens of seconds so that quickly created and deleted files need
+ * not waste disk bandwidth being created and removed. To realize this,
+ * we append vnodes to a "workitem" queue. When running with a soft
+ * updates implementation, most pending metadata dependencies should
+ * not wait for more than a few seconds. Thus, mounted on block devices
+ * are delayed only about a half the time that file data is delayed.
+ * Similarly, directory updates are more critical, so are only delayed
+ * about a third the time that file data is delayed. Thus, there are
+ * SYNCER_MAXDELAY queues that are processed round-robin at a rate of
+ * one each second (driven off the filesystem syner process). The
+ * syncer_delayno variable indicates the next queue that is to be processed.
+ * Items that need to be processed soon are placed in this queue:
+ *
+ *	syncer_workitem_pending[syncer_delayno]
+ *
+ * A delay of fifteen seconds is done by placing the request fifteen
+ * entries later in the queue:
+ *
+ *	syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask]
+ *
+ */
+
+/*
+ * Add an item to the syncer work queue.
+ */
+void
+vn_syncer_add_to_worklist(vp, delay)
+	struct vnode *vp;
+	int delay;
+{
+	int s, slot;
+
+	s = splbio();
+
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+	}
+
+	if (delay > syncer_maxdelay - 2)
+		delay = syncer_maxdelay - 2;
+	slot = (syncer_delayno + delay) & syncer_mask;
+
+	LIST_INSERT_HEAD(&syncer_workitem_pending[slot], vp, v_synclist);
+	vp->v_flag |= VONWORKLST;
+	splx(s);
+}
+
+static void sched_sync __P((void));
+static struct	proc *updateproc;
+static struct kproc_desc up_kp = {
+	"syncer",
+	sched_sync,
+	&updateproc
+};
+SYSINIT_KT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp)
+
+/*
+ * System filesystem synchronizer daemon.
+ */
+void 
+sched_sync(void)
+{
+	struct synclist *slp;
+	struct vnode *vp;
+	long starttime;
+	int s;
+	struct proc *p = updateproc;
+
+	for (;;) {
+		starttime = time.tv_sec;
+
+		/*
+		 * Push files whose dirty time has expired.
+		 */
+		s = splbio();
+		slp = &syncer_workitem_pending[syncer_delayno];
+		syncer_delayno += 1;
+		if (syncer_delayno == syncer_maxdelay)
+			syncer_delayno = 0;
+		splx(s);
+
+		while ((vp = LIST_FIRST(slp)) != NULL) {
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			(void) VOP_FSYNC(vp, p->p_ucred, MNT_LAZY, p);
+			VOP_UNLOCK(vp, 0, p);
+			if (LIST_FIRST(slp) == vp) {
+				if (LIST_FIRST(&vp->v_dirtyblkhd) == NULL &&
+				    vp->v_type != VBLK)
+					panic("sched_sync: fsync failed");
+				/*
+				 * Move ourselves to the back of the sync list.
+				 */
+				LIST_REMOVE(vp, v_synclist);
+				vn_syncer_add_to_worklist(vp, syncdelay);
+			}
+		}
+
+		/*
+		 * Do soft update processing.
+		 */
+		if (bioops.io_sync)
+			(*bioops.io_sync)(NULL);
+
+		/*
+		 * The variable rushjob allows the kernel to speed up the
+		 * processing of the filesystem syncer process. A rushjob
+		 * value of N tells the filesystem syncer to process the next
+		 * N seconds worth of work on its queue ASAP. Currently rushjob
+		 * is used by the soft update code to speed up the filesystem
+		 * syncer process when the incore state is getting so far
+		 * ahead of the disk that the kernel memory pool is being
+		 * threatened with exhaustion.
+		 */
+		if (rushjob > 0) {
+			rushjob -= 1;
+			continue;
+		}
+		/*
+		 * If it has taken us less than a second to process the
+		 * current work, then wait. Otherwise start right over
+		 * again. We can still lose time if any single round
+		 * takes more than two seconds, but it does not really
+		 * matter as we are just trying to generally pace the
+		 * filesystem activity.
+		 */
+		if (time.tv_sec == starttime)
+			tsleep(&lbolt, PPAUSE, "syncer", 0);
+	}
+}
+
+/*
  * Associate a p-buffer with a vnode.
  */
 void
 pbgetvp(vp, bp)
 	register struct vnode *vp;
 	register struct buf *bp;
 {
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp)
 		panic("pbgetvp: not free");
 #endif
 	bp->b_vp = vp;
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		bp->b_dev = vp->v_rdev;
 	else
 		bp->b_dev = NODEV;
 }
 
 /*
  * Disassociate a p-buffer from a vnode.
  */
 void
 pbrelvp(bp)
 	register struct buf *bp;
 {
 
 #if defined(DIAGNOSTIC)
 	if (bp->b_vp == (struct vnode *) 0)
 		panic("pbrelvp: NULL");
 #endif
 
 	bp->b_vp = (struct vnode *) 0;
 }
 
 /*
  * Reassign a buffer from one vnode to another.
  * Used to assign file specific control information
  * (indirect blocks) to the vnode to which they belong.
  */
 void
 reassignbuf(bp, newvp)
 	register struct buf *bp;
 	register struct vnode *newvp;
 {
+	struct buflists *listheadp;
+	int delay;
 	int s;
 
 	if (newvp == NULL) {
 		printf("reassignbuf: NULL");
 		return;
 	}
 
 	s = splbio();
 	/*
 	 * Delete from old vnode list, if on one.
 	 */
 	if (bp->b_vnbufs.le_next != NOLIST) {
 		bufremvn(bp);
 		vdrop(bp->b_vp);
 	}
 	/*
 	 * If dirty, put on list of dirty buffers; otherwise insert onto list
 	 * of clean buffers.
 	 */
 	if (bp->b_flags & B_DELWRI) {
 		struct buf *tbp;
 
-		tbp = newvp->v_dirtyblkhd.lh_first;
+		listheadp = &newvp->v_dirtyblkhd;
+		if ((newvp->v_flag & VONWORKLST) == 0) {
+			switch (newvp->v_type) {
+			case VDIR:
+				delay = syncdelay / 3;
+				break;
+			case VBLK:
+				if (newvp->v_specmountpoint != NULL) {
+					delay = syncdelay / 2;
+					break;
+				}
+				/* fall through */
+			default:
+				delay = syncdelay;
+			}
+			vn_syncer_add_to_worklist(newvp, delay);
+		}
+		tbp = listheadp->lh_first;
 		if (!tbp || (tbp->b_lblkno > bp->b_lblkno)) {
-			bufinsvn(bp, &newvp->v_dirtyblkhd);
+			bufinsvn(bp, listheadp);
 		} else {
 			while (tbp->b_vnbufs.le_next &&
-				(tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
+			    (tbp->b_vnbufs.le_next->b_lblkno < bp->b_lblkno)) {
 				tbp = tbp->b_vnbufs.le_next;
 			}
 			LIST_INSERT_AFTER(tbp, bp, b_vnbufs);
 		}
 	} else {
 		bufinsvn(bp, &newvp->v_cleanblkhd);
+		if ((newvp->v_flag & VONWORKLST) &&
+			LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) {
+			newvp->v_flag &= ~VONWORKLST;
+			LIST_REMOVE(newvp, v_synclist);
+		}
 	}
 	bp->b_vp = newvp;
 	vhold(bp->b_vp);
 	splx(s);
 }
 
 #ifndef DEVFS_ROOT
 /*
  * Create a vnode for a block device.
  * Used for mounting the root file system.
  */
 int
 bdevvp(dev, vpp)
 	dev_t dev;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	struct vnode *nvp;
 	int error;
 
 	if (dev == NODEV)
 		return (0);
 	error = getnewvnode(VT_NON, (struct mount *) 0, spec_vnodeop_p, &nvp);
 	if (error) {
 		*vpp = 0;
 		return (error);
 	}
 	vp = nvp;
 	vp->v_type = VBLK;
 	if ((nvp = checkalias(vp, dev, (struct mount *) 0))) {
 		vput(vp);
 		vp = nvp;
 	}
 	*vpp = vp;
 	return (0);
 }
 #endif /* !DEVFS_ROOT */
 
 /*
  * Check to see if the new vnode represents a special device
  * for which we already have a vnode (either because of
  * bdevvp() or because of a different vnode representing
  * the same block device). If such an alias exists, deallocate
  * the existing contents and return the aliased vnode. The
  * caller is responsible for filling it with its new contents.
  */
 struct vnode *
 checkalias(nvp, nvp_rdev, mp)
 	register struct vnode *nvp;
 	dev_t nvp_rdev;
 	struct mount *mp;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp;
 	struct vnode **vpp;
 
 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
 		return (NULLVP);
 
 	vpp = &speclisth[SPECHASH(nvp_rdev)];
 loop:
 	simple_lock(&spechash_slock);
 	for (vp = *vpp; vp; vp = vp->v_specnext) {
 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		simple_lock(&vp->v_interlock);
 		if (vp->v_usecount == 0) {
 			simple_unlock(&spechash_slock);
 			vgonel(vp, p);
 			goto loop;
 		}
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			simple_unlock(&spechash_slock);
 			goto loop;
 		}
 		break;
 	}
 	if (vp == NULL || vp->v_tag != VT_NON) {
 		MALLOC(nvp->v_specinfo, struct specinfo *,
 		    sizeof(struct specinfo), M_VNODE, M_WAITOK);
 		nvp->v_rdev = nvp_rdev;
 		nvp->v_hashchain = vpp;
 		nvp->v_specnext = *vpp;
-		nvp->v_specflags = 0;
+		nvp->v_specmountpoint = NULL;
 		simple_unlock(&spechash_slock);
 		*vpp = nvp;
 		if (vp != NULLVP) {
 			nvp->v_flag |= VALIASED;
 			vp->v_flag |= VALIASED;
 			vput(vp);
 		}
 		return (NULLVP);
 	}
 	simple_unlock(&spechash_slock);
 	VOP_UNLOCK(vp, 0, p);
 	simple_lock(&vp->v_interlock);
 	vclean(vp, 0, p);
 	vp->v_op = nvp->v_op;
 	vp->v_tag = nvp->v_tag;
 	nvp->v_type = VNON;
 	insmntque(vp, mp);
 	return (vp);
 }
 
 /*
  * Grab a particular vnode from the free list, increment its
  * reference count and lock it. The vnode lock bit is set the
  * vnode is being eliminated in vgone. The process is awakened
  * when the transition is completed, and an error returned to
  * indicate that the vnode is no longer usable (possibly having
  * been changed to a new file system type).
  */
 int
 vget(vp, flags, p)
 	register struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int error;
 
 	/*
 	 * If the vnode is in the process of being cleaned out for
 	 * another use, we wait for the cleaning to finish and then
 	 * return failure. Cleaning is determined by checking that
 	 * the VXLOCK flag is set.
 	 */
 	if ((flags & LK_INTERLOCK) == 0) {
 		simple_lock(&vp->v_interlock);
 	}
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vget", 0);
 		return (ENOENT);
 	}
 
 	vp->v_usecount++;
 
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
-
 	if (flags & LK_TYPE_MASK) {
 		if ((error = vn_lock(vp, flags | LK_INTERLOCK, p)) != 0) {
 			/*
 			 * must expand vrele here because we do not want
 			 * to call VOP_INACTIVE if the reference count
 			 * drops back to zero since it was never really
 			 * active. We must remove it from the free list
 			 * before sleeping so that multiple processes do
 			 * not try to recycle it.
 			 */
 			simple_lock(&vp->v_interlock);
 			vp->v_usecount--;
 			if (VSHOULDFREE(vp))
 				vfree(vp);
 			simple_unlock(&vp->v_interlock);
 		}
 		return (error);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 void
 vref(struct vnode *vp)
 {
 	simple_lock(&vp->v_interlock);
 	vp->v_usecount++;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Vnode put/release.
  * If count drops to zero, call inactive routine and return to freelist.
  */
 void
 vrele(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if (vp == NULL)
 		panic("vrele: null vp");
 #endif
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		simple_unlock(&vp->v_interlock);
 
 		return;
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) {
 			VOP_INACTIVE(vp, p);
 		}
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vrele: negative ref count", vp);
 		simple_unlock(&vp->v_interlock);
 #endif
 		panic("vrele: negative ref cnt");
 	}
 }
 
 void
 vput(vp)
 	struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if (vp == NULL)
 		panic("vput: null vp");
 #endif
 
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_usecount > 1) {
 
 		vp->v_usecount--;
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 		return;
 
 	}
 
 	if (vp->v_usecount == 1) {
 
 		vp->v_usecount--;
 		if (VSHOULDFREE(vp))
 			vfree(vp);
 	/*
 	 * If we are doing a vput, the node is already locked, and we must
 	 * call VOP_INACTIVE with the node locked.  So, in the case of
 	 * vrele, we explicitly lock the vnode before calling VOP_INACTIVE.
 	 */
 		simple_unlock(&vp->v_interlock);
 		VOP_INACTIVE(vp, p);
 
 	} else {
 #ifdef DIAGNOSTIC
 		vprint("vput: negative ref count", vp);
 #endif
 		panic("vput: negative ref cnt");
 	}
 }
 
 /*
  * Somebody doesn't want the vnode recycled.
  */
 void
 vhold(vp)
 	register struct vnode *vp;
 {
 
 	simple_lock(&vp->v_interlock);
 	vp->v_holdcnt++;
 	if (VSHOULDBUSY(vp))
 		vbusy(vp);
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * One less who cares about this vnode.
  */
 void
 vdrop(vp)
 	register struct vnode *vp;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_holdcnt <= 0)
-		panic("holdrele: holdcnt");
+		panic("vdrop: holdcnt");
 	vp->v_holdcnt--;
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Remove any vnodes in the vnode table belonging to mount point mp.
  *
  * If MNT_NOFORCE is specified, there should not be any active ones,
  * return error if any are found (nb: this is a user error, not a
  * system error). If MNT_FORCE is specified, detach any active vnodes
  * that are found.
  */
 #ifdef DIAGNOSTIC
 static int busyprt = 0;		/* print out busy vnodes */
 SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, "");
 #endif
 
 int
 vflush(mp, skipvp, flags)
 	struct mount *mp;
 	struct vnode *skipvp;
 	int flags;
 {
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp, *nvp;
 	int busy = 0;
 
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
 		/*
 		 * Make sure this vnode wasn't reclaimed in getnewvnode().
 		 * Start over if it has (it won't be on the list anymore).
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Skip over a selected vnode.
 		 */
 		if (vp == skipvp)
 			continue;
 
 		simple_lock(&vp->v_interlock);
 		/*
 		 * Skip over a vnodes marked VSYSTEM.
 		 */
 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		/*
 		 * If WRITECLOSE is set, only flush out regular file vnodes
 		 * open for writing.
 		 */
 		if ((flags & WRITECLOSE) &&
 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 
 		/*
 		 * With v_usecount == 0, all we need to do is clear out the
 		 * vnode data structures and we are done.
 		 */
 		if (vp->v_usecount == 0) {
 			simple_unlock(&mntvnode_slock);
 			vgonel(vp, p);
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 
 		/*
 		 * If FORCECLOSE is set, forcibly close the vnode. For block
 		 * or character devices, revert to an anonymous device. For
 		 * all other files, just kill them.
 		 */
 		if (flags & FORCECLOSE) {
 			simple_unlock(&mntvnode_slock);
 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
 				vgonel(vp, p);
 			} else {
 				vclean(vp, 0, p);
 				vp->v_op = spec_vnodeop_p;
 				insmntque(vp, (struct mount *) 0);
 			}
 			simple_lock(&mntvnode_slock);
 			continue;
 		}
 #ifdef DIAGNOSTIC
 		if (busyprt)
 			vprint("vflush: busy vnode", vp);
 #endif
 		simple_unlock(&vp->v_interlock);
 		busy++;
 	}
 	simple_unlock(&mntvnode_slock);
 	if (busy)
 		return (EBUSY);
 	return (0);
 }
 
 /*
  * Disassociate the underlying file system from a vnode.
  */
 static void
 vclean(vp, flags, p)
 	struct vnode *vp;
 	int flags;
 	struct proc *p;
 {
 	int active;
 	vm_object_t obj;
 
 	/*
 	 * Check to see if the vnode is in use. If so we have to reference it
 	 * before we clean it out so that its count cannot fall to zero and
 	 * generate a race against ourselves to recycle it.
 	 */
 	if ((active = vp->v_usecount))
 		vp->v_usecount++;
 
 	/*
 	 * Prevent the vnode from being recycled or brought into use while we
 	 * clean it out.
 	 */
 	if (vp->v_flag & VXLOCK)
 		panic("vclean: deadlock");
 	vp->v_flag |= VXLOCK;
 	/*
 	 * Even if the count is zero, the VOP_INACTIVE routine may still
 	 * have the object locked while it cleans it out. The VOP_LOCK
 	 * ensures that the VOP_INACTIVE routine is done with its work.
 	 * For active vnodes, it ensures that no other activity can
 	 * occur while the underlying object is being cleaned out.
 	 */
 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
 
 	/*
 	 * Clean out any buffers associated with the vnode.
 	 */
 	vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
 	if (obj = vp->v_object) {
 		if (obj->ref_count == 0) {
 			/*
 			 * This is a normal way of shutting down the object/vnode
 			 * association.
 			 */
 			vm_object_terminate(obj);
 		} else {
 			/*
 			 * Woe to the process that tries to page now :-).
 			 */
 			vm_pager_deallocate(obj);
 		}
 	}
 
 	/*
 	 * If purging an active vnode, it must be closed and
 	 * deactivated before being reclaimed. Note that the
 	 * VOP_INACTIVE will unlock the vnode.
 	 */
 	if (active) {
 		if (flags & DOCLOSE)
 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
 		VOP_INACTIVE(vp, p);
 	} else {
 		/*
 		 * Any other processes trying to obtain this lock must first
 		 * wait for VXLOCK to clear, then call the new lock operation.
 		 */
 		VOP_UNLOCK(vp, 0, p);
 	}
 	/*
 	 * Reclaim the vnode.
 	 */
 	if (VOP_RECLAIM(vp, p))
 		panic("vclean: cannot reclaim");
 
 	if (active)
 		vrele(vp);
 
 	cache_purge(vp);
 	if (vp->v_vnlock) {
 #if 0 /* This is the only place we have LK_DRAINED in the entire kernel ??? */
 #ifdef DIAGNOSTIC
 		if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
 			vprint("vclean: lock not drained", vp);
 #endif
 #endif
 		FREE(vp->v_vnlock, M_VNODE);
 		vp->v_vnlock = NULL;
 	}
 
 	if (VSHOULDFREE(vp))
 		vfree(vp);
 
 	/*
 	 * Done with purge, notify sleepers of the grim news.
 	 */
 	vp->v_op = dead_vnodeop_p;
 	vn_pollgone(vp);
 	vp->v_tag = VT_NON;
 	vp->v_flag &= ~VXLOCK;
 	if (vp->v_flag & VXWANT) {
 		vp->v_flag &= ~VXWANT;
 		wakeup((caddr_t) vp);
 	}
 }
 
 /*
  * Eliminate all activity associated with the requested vnode
  * and with all vnodes aliased to the requested vnode.
  */
 int
 vop_revoke(ap)
 	struct vop_revoke_args /* {
 		struct vnode *a_vp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *vp, *vq;
 	struct proc *p = curproc;	/* XXX */
 
 #ifdef DIAGNOSTIC
 	if ((ap->a_flags & REVOKEALL) == 0)
 		panic("vop_revoke");
 #endif
 
 	vp = ap->a_vp;
 	simple_lock(&vp->v_interlock);
 
 	if (vp->v_flag & VALIASED) {
 		/*
 		 * If a vgone (or vclean) is already in progress,
 		 * wait until it is done and return.
 		 */
 		if (vp->v_flag & VXLOCK) {
 			vp->v_flag |= VXWANT;
 			simple_unlock(&vp->v_interlock);
 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
 			return (0);
 		}
 		/*
 		 * Ensure that vp will not be vgone'd while we
 		 * are eliminating its aliases.
 		 */
 		vp->v_flag |= VXLOCK;
 		simple_unlock(&vp->v_interlock);
 		while (vp->v_flag & VALIASED) {
 			simple_lock(&spechash_slock);
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_rdev != vp->v_rdev ||
 				    vq->v_type != vp->v_type || vp == vq)
 					continue;
 				simple_unlock(&spechash_slock);
 				vgone(vq);
 				break;
 			}
 			if (vq == NULLVP) {
 				simple_unlock(&spechash_slock);
 			}
 		}
 		/*
 		 * Remove the lock so that vgone below will
 		 * really eliminate the vnode after which time
 		 * vgone will awaken any sleepers.
 		 */
 		simple_lock(&vp->v_interlock);
 		vp->v_flag &= ~VXLOCK;
 		if (vp->v_flag & VXWANT) {
 			vp->v_flag &= ~VXWANT;
 			wakeup(vp);
 		}
 	}
 	vgonel(vp, p);
 	return (0);
 }
 
 /*
  * Recycle an unused vnode to the front of the free list.
  * Release the passed interlock if the vnode will be recycled.
  */
 int
 vrecycle(vp, inter_lkp, p)
 	struct vnode *vp;
 	struct simplelock *inter_lkp;
 	struct proc *p;
 {
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount == 0) {
 		if (inter_lkp) {
 			simple_unlock(inter_lkp);
 		}
 		vgonel(vp, p);
 		return (1);
 	}
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 /*
  * Eliminate all activity associated with a vnode
  * in preparation for reuse.
  */
 void
 vgone(vp)
 	register struct vnode *vp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	simple_lock(&vp->v_interlock);
 	vgonel(vp, p);
 }
 
 /*
  * vgone, with the vp interlock held.
  */
 static void
 vgonel(vp, p)
 	struct vnode *vp;
 	struct proc *p;
 {
 	int s;
 	struct vnode *vq;
 	struct vnode *vx;
 
 	/*
 	 * If a vgone (or vclean) is already in progress,
 	 * wait until it is done and return.
 	 */
 	if (vp->v_flag & VXLOCK) {
 		vp->v_flag |= VXWANT;
 		simple_unlock(&vp->v_interlock);
 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
 		return;
 	}
 
 	/*
 	 * Clean out the filesystem specific data.
 	 */
 	vclean(vp, DOCLOSE, p);
 	simple_lock(&vp->v_interlock);
 
 	/*
 	 * Delete from old mount point vnode list, if on one.
 	 */
 	if (vp->v_mount != NULL)
 		insmntque(vp, (struct mount *)0);
 	/*
 	 * If special device, remove it from special device alias list
 	 * if it is on one.
 	 */
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
 		simple_lock(&spechash_slock);
 		if (*vp->v_hashchain == vp) {
 			*vp->v_hashchain = vp->v_specnext;
 		} else {
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_specnext != vp)
 					continue;
 				vq->v_specnext = vp->v_specnext;
 				break;
 			}
 			if (vq == NULL)
 				panic("missing bdev");
 		}
 		if (vp->v_flag & VALIASED) {
 			vx = NULL;
 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 				if (vq->v_rdev != vp->v_rdev ||
 				    vq->v_type != vp->v_type)
 					continue;
 				if (vx)
 					break;
 				vx = vq;
 			}
 			if (vx == NULL)
 				panic("missing alias");
 			if (vq == NULL)
 				vx->v_flag &= ~VALIASED;
 			vp->v_flag &= ~VALIASED;
 		}
 		simple_unlock(&spechash_slock);
 		FREE(vp->v_specinfo, M_VNODE);
 		vp->v_specinfo = NULL;
 	}
 
 	/*
 	 * If it is on the freelist and not already at the head,
 	 * move it to the head of the list. The test of the back
 	 * pointer and the reference count of zero is because
 	 * it will be removed from the free list by getnewvnode,
 	 * but will not have its reference count incremented until
 	 * after calling vgone. If the reference count were
 	 * incremented first, vgone would (incorrectly) try to
 	 * close the previous instance of the underlying object.
 	 */
 	if (vp->v_usecount == 0 && !(vp->v_flag & VDOOMED)) {
 		s = splbio();
 		simple_lock(&vnode_free_list_slock);
 		if (vp->v_flag & VFREE) {
 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		} else if (vp->v_flag & VTBFREE) {
 			TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 			vp->v_flag &= ~VTBFREE;
 			freevnodes++;
 		} else
 			freevnodes++;
 		vp->v_flag |= VFREE;
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 		simple_unlock(&vnode_free_list_slock);
 		splx(s);
 	}
 
 	vp->v_type = VBAD;
 	simple_unlock(&vp->v_interlock);
 }
 
 /*
  * Lookup a vnode by device number.
  */
 int
 vfinddev(dev, type, vpp)
 	dev_t dev;
 	enum vtype type;
 	struct vnode **vpp;
 {
 	register struct vnode *vp;
 	int rc = 0;
 
 	simple_lock(&spechash_slock);
 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
 		if (dev != vp->v_rdev || type != vp->v_type)
 			continue;
 		*vpp = vp;
 		rc = 1;
 		break;
 	}
 	simple_unlock(&spechash_slock);
 	return (rc);
 }
 
 /*
  * Calculate the total number of references to a special device.
  */
 int
 vcount(vp)
 	register struct vnode *vp;
 {
 	struct vnode *vq, *vnext;
 	int count;
 
 loop:
 	if ((vp->v_flag & VALIASED) == 0)
 		return (vp->v_usecount);
 	simple_lock(&spechash_slock);
 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
 		vnext = vq->v_specnext;
 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
 			continue;
 		/*
 		 * Alias, but not in use, so flush it out.
 		 */
 		if (vq->v_usecount == 0 && vq != vp) {
 			simple_unlock(&spechash_slock);
 			vgone(vq);
 			goto loop;
 		}
 		count += vq->v_usecount;
 	}
 	simple_unlock(&spechash_slock);
 	return (count);
 }
 /*
  * Print out a description of a vnode.
  */
 static char *typename[] =
 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"};
 
 void
 vprint(label, vp)
 	char *label;
 	register struct vnode *vp;
 {
 	char buf[64];
 
 	if (label != NULL)
 		printf("%s: %x: ", label, vp);
 	else
 		printf("%x: ", vp);
 	printf("type %s, usecount %d, writecount %d, refcount %ld,",
 	    typename[vp->v_type], vp->v_usecount, vp->v_writecount,
 	    vp->v_holdcnt);
 	buf[0] = '\0';
 	if (vp->v_flag & VROOT)
 		strcat(buf, "|VROOT");
 	if (vp->v_flag & VTEXT)
 		strcat(buf, "|VTEXT");
 	if (vp->v_flag & VSYSTEM)
 		strcat(buf, "|VSYSTEM");
 	if (vp->v_flag & VXLOCK)
 		strcat(buf, "|VXLOCK");
 	if (vp->v_flag & VXWANT)
 		strcat(buf, "|VXWANT");
 	if (vp->v_flag & VBWAIT)
 		strcat(buf, "|VBWAIT");
 	if (vp->v_flag & VALIASED)
 		strcat(buf, "|VALIASED");
 	if (vp->v_flag & VDOOMED)
 		strcat(buf, "|VDOOMED");
 	if (vp->v_flag & VFREE)
 		strcat(buf, "|VFREE");
 	if (vp->v_flag & VOBJBUF)
 		strcat(buf, "|VOBJBUF");
 	if (buf[0] != '\0')
 		printf(" flags (%s)", &buf[1]);
 	if (vp->v_data == NULL) {
 		printf("\n");
 	} else {
 		printf("\n\t");
 		VOP_PRINT(vp);
 	}
 }
 
 #ifdef DDB
 /*
  * List all of the locked vnodes in the system.
  * Called when debugging the kernel.
  */
 static void
 printlockedvnodes()
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *vp;
 
 	printf("Locked vnodes\n");
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = vp->v_mntvnodes.le_next) {
 			if (VOP_ISLOCKED(vp))
 				vprint((char *)0, vp);
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 }
 #endif
 
 /*
  * Top level filesystem related information gathering.
  */
 static int	sysctl_ovfs_conf __P(SYSCTL_HANDLER_ARGS);
 
 static int
 vfs_sysctl SYSCTL_HANDLER_ARGS
 {
 	int *name = (int *)arg1 - 1;	/* XXX */
 	u_int namelen = arg2 + 1;	/* XXX */
 	struct vfsconf *vfsp;
 
 #ifndef NO_COMPAT_PRELITE2
 	/* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */
 	if (namelen == 1)
 		return (sysctl_ovfs_conf(oidp, arg1, arg2, req));
 #endif
 
 #ifdef notyet
 	/* all sysctl names at this level are at least name and field */
 	if (namelen < 2)
 		return (ENOTDIR);		/* overloaded */
 	if (name[0] != VFS_GENERIC) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[0])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
 		    oldp, oldlenp, newp, newlen, p));
 	}
 #endif
 	switch (name[1]) {
 	case VFS_MAXTYPENUM:
 		if (namelen != 2)
 			return (ENOTDIR);
 		return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int)));
 	case VFS_CONF:
 		if (namelen != 3)
 			return (ENOTDIR);	/* overloaded */
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == name[2])
 				break;
 		if (vfsp == NULL)
 			return (EOPNOTSUPP);
 		return (SYSCTL_OUT(req, vfsp, sizeof *vfsp));
 	}
 	return (EOPNOTSUPP);
 }
 
 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl,
 	"Generic filesystem");
 
 #ifndef NO_COMPAT_PRELITE2
 
 static int
 sysctl_ovfs_conf SYSCTL_HANDLER_ARGS
 {
 	int error;
 	struct vfsconf *vfsp;
 	struct ovfsconf ovfs;
 
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
 		ovfs.vfc_vfsops = vfsp->vfc_vfsops;	/* XXX used as flag */
 		strcpy(ovfs.vfc_name, vfsp->vfc_name);
 		ovfs.vfc_index = vfsp->vfc_typenum;
 		ovfs.vfc_refcount = vfsp->vfc_refcount;
 		ovfs.vfc_flags = vfsp->vfc_flags;
 		error = SYSCTL_OUT(req, &ovfs, sizeof ovfs);
 		if (error)
 			return error;
 	}
 	return 0;
 }
 
 #endif /* !NO_COMPAT_PRELITE2 */
 
 static volatile int kinfo_vdebug = 1;
 
 #if 0
 #define KINFO_VNODESLOP	10
 /*
  * Dump vnode list (via sysctl).
  * Copyout address of vnode followed by vnode.
  */
 /* ARGSUSED */
 static int
 sysctl_vnode SYSCTL_HANDLER_ARGS
 {
 	struct proc *p = curproc;	/* XXX */
 	struct mount *mp, *nmp;
 	struct vnode *nvp, *vp;
 	int error;
 
 #define VPTRSZ	sizeof (struct vnode *)
 #define VNODESZ	sizeof (struct vnode)
 
 	req->lock = 0;
 	if (!req->oldptr) /* Make an estimate */
 		return (SYSCTL_OUT(req, 0,
 			(numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ)));
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 again:
 		simple_lock(&mntvnode_slock);
 		for (vp = mp->mnt_vnodelist.lh_first;
 		     vp != NULL;
 		     vp = nvp) {
 			/*
 			 * Check that the vp is still associated with
 			 * this filesystem.  RACE: could have been
 			 * recycled onto the same filesystem.
 			 */
 			if (vp->v_mount != mp) {
 				simple_unlock(&mntvnode_slock);
 				if (kinfo_vdebug)
 					printf("kinfo: vp changed\n");
 				goto again;
 			}
 			nvp = vp->v_mntvnodes.le_next;
 			simple_unlock(&mntvnode_slock);
 			if ((error = SYSCTL_OUT(req, &vp, VPTRSZ)) ||
 			    (error = SYSCTL_OUT(req, vp, VNODESZ)))
 				return (error);
 			simple_lock(&mntvnode_slock);
 		}
 		simple_unlock(&mntvnode_slock);
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 
 	return (0);
 }
 #endif
 
 /*
  * XXX
  * Exporting the vnode list on large systems causes them to crash.
  * Exporting the vnode list on medium systems causes sysctl to coredump.
  */
 #if 0
 SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, 0, sysctl_vnode, "S,vnode", "");
 #endif
 
 /*
  * Check to see if a filesystem is mounted on a block device.
  */
 int
 vfs_mountedon(vp)
 	struct vnode *vp;
 {
 	struct vnode *vq;
 	int error = 0;
 
-	if (vp->v_specflags & SI_MOUNTEDON)
+	if (vp->v_specmountpoint != NULL)
 		return (EBUSY);
 	if (vp->v_flag & VALIASED) {
 		simple_lock(&spechash_slock);
 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
 			if (vq->v_rdev != vp->v_rdev ||
 			    vq->v_type != vp->v_type)
 				continue;
-			if (vq->v_specflags & SI_MOUNTEDON) {
+			if (vq->v_specmountpoint != NULL) {
 				error = EBUSY;
 				break;
 			}
 		}
 		simple_unlock(&spechash_slock);
 	}
 	return (error);
 }
 
 /*
  * Unmount all filesystems. The list is traversed in reverse order
  * of mounting to avoid dependencies.
  */
 void
 vfs_unmountall()
 {
 	struct mount *mp, *nmp;
 	struct proc *p = initproc;	/* XXX XXX should this be proc0? */
 	int error;
 
 	/*
 	 * Since this only runs when rebooting, it is not interlocked.
 	 */
 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
 		nmp = mp->mnt_list.cqe_prev;
 		error = dounmount(mp, MNT_FORCE, p);
 		if (error) {
 			printf("unmount of %s failed (",
 			    mp->mnt_stat.f_mntonname);
 			if (error == EBUSY)
 				printf("BUSY)\n");
 			else
 				printf("%d)\n", error);
 		}
 	}
 }
 
 /*
  * Build hash lists of net addresses and hang them off the mount point.
  * Called by ufs_mount() to set up the lists of export addresses.
  */
 static int
 vfs_hang_addrlist(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	register int i;
 	struct radix_node *rn;
 	struct sockaddr *saddr, *smask = 0;
 	struct domain *dom;
 	int error;
 
 	if (argp->ex_addrlen == 0) {
 		if (mp->mnt_flag & MNT_DEFEXPORTED)
 			return (EPERM);
 		np = &nep->ne_defexported;
 		np->netc_exflags = argp->ex_flags;
 		np->netc_anon = argp->ex_anon;
 		np->netc_anon.cr_ref = 1;
 		mp->mnt_flag |= MNT_DEFEXPORTED;
 		return (0);
 	}
 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
 	np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK);
 	bzero((caddr_t) np, i);
 	saddr = (struct sockaddr *) (np + 1);
 	if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen)))
 		goto out;
 	if (saddr->sa_len > argp->ex_addrlen)
 		saddr->sa_len = argp->ex_addrlen;
 	if (argp->ex_masklen) {
 		smask = (struct sockaddr *) ((caddr_t) saddr + argp->ex_addrlen);
 		error = copyin(argp->ex_mask, (caddr_t) smask, argp->ex_masklen);
 		if (error)
 			goto out;
 		if (smask->sa_len > argp->ex_masklen)
 			smask->sa_len = argp->ex_masklen;
 	}
 	i = saddr->sa_family;
 	if ((rnh = nep->ne_rtable[i]) == 0) {
 		/*
 		 * Seems silly to initialize every AF when most are not used,
 		 * do so on demand here
 		 */
 		for (dom = domains; dom; dom = dom->dom_next)
 			if (dom->dom_family == i && dom->dom_rtattach) {
 				dom->dom_rtattach((void **) &nep->ne_rtable[i],
 				    dom->dom_rtoffset);
 				break;
 			}
 		if ((rnh = nep->ne_rtable[i]) == 0) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	rn = (*rnh->rnh_addaddr) ((caddr_t) saddr, (caddr_t) smask, rnh,
 	    np->netc_rnodes);
 	if (rn == 0 || np != (struct netcred *) rn) {	/* already exists */
 		error = EPERM;
 		goto out;
 	}
 	np->netc_exflags = argp->ex_flags;
 	np->netc_anon = argp->ex_anon;
 	np->netc_anon.cr_ref = 1;
 	return (0);
 out:
 	free(np, M_NETADDR);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 vfs_free_netcred(rn, w)
 	struct radix_node *rn;
 	void *w;
 {
 	register struct radix_node_head *rnh = (struct radix_node_head *) w;
 
 	(*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh);
 	free((caddr_t) rn, M_NETADDR);
 	return (0);
 }
 
 /*
  * Free the net address hash lists that are hanging off the mount points.
  */
 static void
 vfs_free_addrlist(nep)
 	struct netexport *nep;
 {
 	register int i;
 	register struct radix_node_head *rnh;
 
 	for (i = 0; i <= AF_MAX; i++)
 		if ((rnh = nep->ne_rtable[i])) {
 			(*rnh->rnh_walktree) (rnh, vfs_free_netcred,
 			    (caddr_t) rnh);
 			free((caddr_t) rnh, M_RTABLE);
 			nep->ne_rtable[i] = 0;
 		}
 }
 
 int
 vfs_export(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 
 	if (argp->ex_flags & MNT_DELEXPORT) {
 		if (mp->mnt_flag & MNT_EXPUBLIC) {
 			vfs_setpublicfs(NULL, NULL, NULL);
 			mp->mnt_flag &= ~MNT_EXPUBLIC;
 		}
 		vfs_free_addrlist(nep);
 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
 	}
 	if (argp->ex_flags & MNT_EXPORTED) {
 		if (argp->ex_flags & MNT_EXPUBLIC) {
 			if ((error = vfs_setpublicfs(mp, nep, argp)) != 0)
 				return (error);
 			mp->mnt_flag |= MNT_EXPUBLIC;
 		}
 		if ((error = vfs_hang_addrlist(mp, nep, argp)))
 			return (error);
 		mp->mnt_flag |= MNT_EXPORTED;
 	}
 	return (0);
 }
 
 
 /*
  * Set the publicly exported filesystem (WebNFS). Currently, only
  * one public filesystem is possible in the spec (RFC 2054 and 2055)
  */
 int
 vfs_setpublicfs(mp, nep, argp)
 	struct mount *mp;
 	struct netexport *nep;
 	struct export_args *argp;
 {
 	int error;
 	struct vnode *rvp;
 	char *cp;
 
 	/*
 	 * mp == NULL -> invalidate the current info, the FS is
 	 * no longer exported. May be called from either vfs_export
 	 * or unmount, so check if it hasn't already been done.
 	 */
 	if (mp == NULL) {
 		if (nfs_pub.np_valid) {
 			nfs_pub.np_valid = 0;
 			if (nfs_pub.np_index != NULL) {
 				FREE(nfs_pub.np_index, M_TEMP);
 				nfs_pub.np_index = NULL;
 			}
 		}
 		return (0);
 	}
 
 	/*
 	 * Only one allowed at a time.
 	 */
 	if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount)
 		return (EBUSY);
 
 	/*
 	 * Get real filehandle for root of exported FS.
 	 */
 	bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle));
 	nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid;
 
 	if ((error = VFS_ROOT(mp, &rvp)))
 		return (error);
 
 	if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid)))
 		return (error);
 
 	vput(rvp);
 
 	/*
 	 * If an indexfile was specified, pull it in.
 	 */
 	if (argp->ex_indexfile != NULL) {
 		MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP,
 		    M_WAITOK);
 		error = copyinstr(argp->ex_indexfile, nfs_pub.np_index,
 		    MAXNAMLEN, (size_t *)0);
 		if (!error) {
 			/*
 			 * Check for illegal filenames.
 			 */
 			for (cp = nfs_pub.np_index; *cp; cp++) {
 				if (*cp == '/') {
 					error = EINVAL;
 					break;
 				}
 			}
 		}
 		if (error) {
 			FREE(nfs_pub.np_index, M_TEMP);
 			return (error);
 		}
 	}
 
 	nfs_pub.np_mount = mp;
 	nfs_pub.np_valid = 1;
 	return (0);
 }
 
 struct netcred *
 vfs_export_lookup(mp, nep, nam)
 	register struct mount *mp;
 	struct netexport *nep;
 	struct sockaddr *nam;
 {
 	register struct netcred *np;
 	register struct radix_node_head *rnh;
 	struct sockaddr *saddr;
 
 	np = NULL;
 	if (mp->mnt_flag & MNT_EXPORTED) {
 		/*
 		 * Lookup in the export list first.
 		 */
 		if (nam != NULL) {
 			saddr = nam;
 			rnh = nep->ne_rtable[saddr->sa_family];
 			if (rnh != NULL) {
 				np = (struct netcred *)
 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
 							      rnh);
 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
 					np = NULL;
 			}
 		}
 		/*
 		 * If no address match, use the default if it exists.
 		 */
 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
 			np = &nep->ne_defexported;
 	}
 	return (np);
 }
 
 /*
  * perform msync on all vnodes under a mount point
  * the mount point must be locked.
  */
 void
 vfs_msync(struct mount *mp, int flags) {
 	struct vnode *vp, *nvp;
 	int anyio, tries;
 
 	tries = 5;
 loop:
 	anyio = 0;
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 
 		nvp = vp->v_mntvnodes.le_next;
 
 		if (vp->v_mount != mp) {
 			goto loop;
 		}
 
 		if ((vp->v_flag & VXLOCK) ||
 			(VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) {
 			continue;
 		}
 
 		simple_lock(&vp->v_interlock);
 		if (vp->v_object &&
 		   (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) {
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (vp->v_object) {
 					vm_object_page_clean(vp->v_object, 0, 0, TRUE);
 					anyio = 1;
 				}
 				vput(vp);
 			}
 		} else {
 			simple_unlock(&vp->v_interlock);
 		}
 	}
 	if (anyio && (--tries > 0))
 		goto loop;
 }
 
 /*
  * Create the VM object needed for VMIO and mmap support.  This
  * is done for all VREG files in the system.  Some filesystems might
  * afford the additional metadata buffering capability of the
  * VMIO code by making the device node be VMIO mode also.
  *
  * If !waslocked, must be called with interlock.
  */
 int
 vfs_object_create(vp, p, cred, waslocked)
 	struct vnode *vp;
 	struct proc *p;
 	struct ucred *cred;
 	int waslocked;
 {
 	struct vattr vat;
 	vm_object_t object;
 	int error = 0;
 
 	if ((vp->v_type != VREG) && (vp->v_type != VBLK)) {
 		if (!waslocked)
 			simple_unlock(&vp->v_interlock);
 		return 0;
 	}
 
 	if (!waslocked) 
 		vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p);
 
 retry:
 	if ((object = vp->v_object) == NULL) {
 		if (vp->v_type == VREG) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
 			object = vnode_pager_alloc(vp,
 				OFF_TO_IDX(round_page(vat.va_size)), 0, 0);
 		} else if (major(vp->v_rdev) < nblkdev) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a VBLK vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
 			object = vnode_pager_alloc(vp, INT_MAX, 0, 0);
 		}
 		object->ref_count--;
 		vp->v_usecount--;
 	} else {
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
 			tsleep(object, PVM, "vodead", 0);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
 	}
 
 	if (vp->v_object) {
 		vp->v_flag |= VOBJBUF;
 	}
 
 retn:
 	if (!waslocked) {
 		simple_lock(&vp->v_interlock);
 		VOP_UNLOCK(vp, LK_INTERLOCK, p);
 	}
 
 	return error;
 }
 
 static void
 vfree(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	}
 	if (vp->v_flag & VAGE) {
 		TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
 		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
 	}
 	freevnodes++;
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~VAGE;
 	vp->v_flag |= VFREE;
 	splx(s);
 }
 
 void
 vbusy(vp)
 	struct vnode *vp;
 {
 	int s;
 
 	s = splbio();
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VTBFREE) {
 		TAILQ_REMOVE(&vnode_tobefree_list, vp, v_freelist);
 		vp->v_flag &= ~VTBFREE;
 	} else {
 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
 	}
 	simple_unlock(&vnode_free_list_slock);
 	vp->v_flag &= ~(VFREE|VAGE);
 	splx(s);
 }
 
 /*
  * Record a process's interest in events which might happen to
  * a vnode.  Because poll uses the historic select-style interface
  * internally, this routine serves as both the ``check for any
  * pending events'' and the ``record my interest in future events''
  * functions.  (These are done together, while the lock is held,
  * to avoid race conditions.)
  */
 int
 vn_pollrecord(vp, p, events)
 	struct vnode *vp;
 	struct proc *p;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_revents & events) {
 		/*
 		 * This leaves events we are not interested
 		 * in available for the other process which
 		 * which presumably had requested them
 		 * (otherwise they would never have been
 		 * recorded).
 		 */
 		events &= vp->v_pollinfo.vpi_revents;
 		vp->v_pollinfo.vpi_revents &= ~events;
 
 		simple_unlock(&vp->v_pollinfo.vpi_lock);
 		return events;
 	}
 	vp->v_pollinfo.vpi_events |= events;
 	selrecord(p, &vp->v_pollinfo.vpi_selinfo);
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 	return 0;
 }
 
 /*
  * Note the occurrence of an event.  If the VN_POLLEVENT macro is used,
  * it is possible for us to miss an event due to race conditions, but
  * that condition is expected to be rare, so for the moment it is the
  * preferred interface.
  */
 void
 vn_pollevent(vp, events)
 	struct vnode *vp;
 	short events;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events & events) {
 		/*
 		 * We clear vpi_events so that we don't
 		 * call selwakeup() twice if two events are
 		 * posted before the polling process(es) is
 		 * awakened.  This also ensures that we take at
 		 * most one selwakeup() if the polling process
 		 * is no longer interested.  However, it does
 		 * mean that only one event can be noticed at
 		 * a time.  (Perhaps we should only clear those
 		 * event bits which we note?) XXX
 		 */
 		vp->v_pollinfo.vpi_events = 0;	/* &= ~events ??? */
 		vp->v_pollinfo.vpi_revents |= events;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
 }
 
 /*
  * Wake up anyone polling on vp because it is being revoked.
  * This depends on dead_poll() returning POLLHUP for correct
  * behavior.
  */
 void
 vn_pollgone(vp)
 	struct vnode *vp;
 {
 	simple_lock(&vp->v_pollinfo.vpi_lock);
 	if (vp->v_pollinfo.vpi_events) {
 		vp->v_pollinfo.vpi_events = 0;
 		selwakeup(&vp->v_pollinfo.vpi_selinfo);
 	}
 	simple_unlock(&vp->v_pollinfo.vpi_lock);
+}
+
+
+
+/*
+ * Routine to create and manage a filesystem syncer vnode.
+ */
+#define sync_close ((int (*) __P((struct  vop_close_args *)))nullop)
+int	sync_fsync __P((struct  vop_fsync_args *));
+int	sync_inactive __P((struct  vop_inactive_args *));
+int	sync_reclaim  __P((struct  vop_reclaim_args *));
+#define sync_lock ((int (*) __P((struct  vop_lock_args *)))vop_nolock)
+#define sync_unlock ((int (*) __P((struct  vop_unlock_args *)))vop_nounlock)
+int	sync_print __P((struct vop_print_args *));
+#define sync_islocked ((int(*) __P((struct vop_islocked_args *)))vop_noislocked)
+
+vop_t **sync_vnodeop_p;
+struct vnodeopv_entry_desc sync_vnodeop_entries[] = {
+	{ &vop_default_desc,	(vop_t *) vop_eopnotsupp },
+	{ &vop_close_desc,	(vop_t *) sync_close },		/* close */
+	{ &vop_fsync_desc,	(vop_t *) sync_fsync },		/* fsync */
+	{ &vop_inactive_desc,	(vop_t *) sync_inactive },	/* inactive */
+	{ &vop_reclaim_desc,	(vop_t *) sync_reclaim },	/* reclaim */
+	{ &vop_lock_desc,	(vop_t *) sync_lock },		/* lock */
+	{ &vop_unlock_desc,	(vop_t *) sync_unlock },	/* unlock */
+	{ &vop_print_desc,	(vop_t *) sync_print },		/* print */
+	{ &vop_islocked_desc,	(vop_t *) sync_islocked },	/* islocked */
+	{ NULL, NULL }
+};
+struct vnodeopv_desc sync_vnodeop_opv_desc =
+	{ &sync_vnodeop_p, sync_vnodeop_entries };
+
+VNODEOP_SET(sync_vnodeop_opv_desc);
+
+/*
+ * Create a new filesystem syncer vnode for the specified mount point.
+ */
+int
+vfs_allocate_syncvnode(mp)
+	struct mount *mp;
+{
+	struct vnode *vp;
+	static long start, incr, next;
+	int error;
+
+	/* Allocate a new vnode */
+	if ((error = getnewvnode(VT_VFS, mp, sync_vnodeop_p, &vp)) != 0) {
+		mp->mnt_syncer = NULL;
+		return (error);
+	}
+	vp->v_type = VNON;
+	/*
+	 * Place the vnode onto the syncer worklist. We attempt to
+	 * scatter them about on the list so that they will go off
+	 * at evenly distributed times even if all the filesystems
+	 * are mounted at once.
+	 */
+	next += incr;
+	if (next == 0 || next > syncer_maxdelay) {
+		start /= 2;
+		incr /= 2;
+		if (start == 0) {
+			start = syncer_maxdelay / 2;
+			incr = syncer_maxdelay;
+		}
+		next = start;
+	}
+	vn_syncer_add_to_worklist(vp, syncdelay > 0 ? next % syncdelay : 0);
+	mp->mnt_syncer = vp;
+	return (0);
+}
+
+/*
+ * Do a lazy sync of the filesystem.
+ */
+int
+sync_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *syncvp = ap->a_vp;
+	struct mount *mp = syncvp->v_mount;
+	struct proc *p = ap->a_p;
+	int asyncflag;
+
+	/*
+	 * We only need to do something if this is a lazy evaluation.
+	 */
+	if (ap->a_waitfor != MNT_LAZY)
+		return (0);
+
+	/*
+	 * Move ourselves to the back of the sync list.
+	 */
+	vn_syncer_add_to_worklist(syncvp, syncdelay);
+
+	/*
+	 * Walk the list of vnodes pushing all that are dirty and
+	 * not already on the sync list.
+	 */
+	simple_lock(&mountlist_slock);
+	if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_slock, p) != 0)
+		return (0);
+	asyncflag = mp->mnt_flag & MNT_ASYNC;
+	mp->mnt_flag &= ~MNT_ASYNC;
+	VFS_SYNC(mp, MNT_LAZY, ap->a_cred, p);
+	if (asyncflag)
+		mp->mnt_flag |= MNT_ASYNC;
+	vfs_unbusy(mp, p);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no referenced.
+ */
+int
+sync_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vgone(ap->a_vp);
+	return (0);
+}
+
+/*
+ * The syncer vnode is no longer needed and is being decommissioned.
+ */
+int
+sync_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	vp->v_mount->mnt_syncer = NULL;
+	if (vp->v_flag & VONWORKLST) {
+		LIST_REMOVE(vp, v_synclist);
+		vp->v_flag &= ~VONWORKLST;
+	}
+
+	return (0);
+}
+
+/*
+ * Print out a syncer vnode.
+ */
+int
+sync_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("syncer vnode");
+	if (vp->v_vnlock != NULL)
+		lockmgr_printinfo(vp->v_vnlock);
+	printf("\n");
+	return (0);
 }
Index: head/sys/kern/vfs_syscalls.c
===================================================================
--- head/sys/kern/vfs_syscalls.c	(revision 34265)
+++ head/sys/kern/vfs_syscalls.c	(revision 34266)
@@ -1,2826 +1,2841 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vfs_syscalls.c	8.13 (Berkeley) 4/15/94
- * $Id: vfs_syscalls.c,v 1.93 1998/02/15 04:17:09 dyson Exp $
+ * $Id: vfs_syscalls.c,v 1.94 1998/03/07 21:35:39 dyson Exp $
  */
 
 /* For 4.3 integer FS ID compatibility */
 #include "opt_compat.h"
 
 /*
  * XXX - The following is required because of some magic done 
  * in getdirentries() below which is only done if the translucent
  * filesystem `UNION' is compiled into the kernel.  This is broken,
  * but I don't have time to study the code deeply enough to understand
  * what's going on and determine an appropriate fix.  -GAW
  */
 #include "opt_union.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysent.h>
 #include <sys/sysproto.h>
 #include <sys/namei.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/stat.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/dirent.h>
 
 #ifdef UNION
 #include <miscfs/union/union.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 #include <sys/sysctl.h>
 
 static int change_dir __P((struct nameidata *ndp, struct proc *p));
 static void checkdirs __P((struct vnode *olddp));
 
 static int	usermount = 0;	/* if 1, non-root can mount fs. */
 
 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, "");
 
 /*
  * Virtual File System System Calls
  */
 
 /*
  * Mount a file system.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mount_args {
 	char	*type;
 	char	*path;
 	int	flags;
 	caddr_t	data;
 };
 #endif
 /* ARGSUSED */
 int
 mount(p, uap)
 	struct proc *p;
 	register struct mount_args /* {
 		syscallarg(char *) type;
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(caddr_t) data;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct mount *mp;
 	struct vfsconf *vfsp;
 	int error, flag = 0, flag2 = 0;
 	struct vattr va;
 	u_long fstypenum;
 	struct nameidata nd;
 	char fstypename[MFSNAMELEN];
 
 	if (usermount == 0 && (error = suser(p->p_ucred, &p->p_acflag)))
 		return (error);
 
 	/*
 	 * Get vnode to be covered
 	 */
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (SCARG(uap, flags) & MNT_UPDATE) {
 		if ((vp->v_flag & VROOT) == 0) {
 			vput(vp);
 			return (EINVAL);
 		}
 		mp = vp->v_mount;
 		flag = mp->mnt_flag;
 		flag2 = mp->mnt_kern_flag;
 		/*
 		 * We only allow the filesystem to be reloaded if it
 		 * is currently mounted read-only.
 		 */
 		if ((SCARG(uap, flags) & MNT_RELOAD) &&
 		    ((mp->mnt_flag & MNT_RDONLY) == 0)) {
 			vput(vp);
 			return (EOPNOTSUPP);	/* Needs translation */
 		}
 		mp->mnt_flag |=
 		    SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
 		/*
 		 * Only root, or the user that did the original mount is
 		 * permitted to update it.
 		 */
 		if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid &&
 		    (error = suser(p->p_ucred, &p->p_acflag))) {
 			vput(vp);
 			return (error);
 		}
 		/*
 		 * Do not allow NFS export by non-root users. Silently
 		 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
 		 */
 		if (p->p_ucred->cr_uid != 0) {
 			if (SCARG(uap, flags) & MNT_EXPORTED) {
 				vput(vp);
 				return (EPERM);
 			}
 			SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
 		}
 		if (vfs_busy(mp, LK_NOWAIT, 0, p)) {
 			vput(vp);
 			return (EBUSY);
 		}
 		VOP_UNLOCK(vp, 0, p);
 		goto update;
 	}
 	/*
 	 * If the user is not root, ensure that they own the directory
 	 * onto which we are attempting to mount.
 	 */
 	if ((error = VOP_GETATTR(vp, &va, p->p_ucred, p)) ||
 	    (va.va_uid != p->p_ucred->cr_uid &&
 	     (error = suser(p->p_ucred, &p->p_acflag)))) {
 		vput(vp);
 		return (error);
 	}
 	/*
 	 * Do not allow NFS export by non-root users. Silently
 	 * enforce MNT_NOSUID and MNT_NODEV for non-root users.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		if (SCARG(uap, flags) & MNT_EXPORTED) {
 			vput(vp);
 			return (EPERM);
 		}
 		SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV;
 	}
 	if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
 		return (error);
 	if (vp->v_type != VDIR) {
 		vput(vp);
 		return (ENOTDIR);
 	}
 #ifdef COMPAT_43
 	/*
 	 * Historically filesystem types were identified by number. If we
 	 * get an integer for the filesystem type instead of a string, we
 	 * check to see if it matches one of the historic filesystem types.
 	 */
 	fstypenum = (u_long)SCARG(uap, type);
 	if (fstypenum < maxvfsconf) {
 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 			if (vfsp->vfc_typenum == fstypenum)
 				break;
 		if (vfsp == NULL) {
 			vput(vp);
 			return (ENODEV);
 		}
 		strncpy(fstypename, vfsp->vfc_name, MFSNAMELEN);
 	} else
 #endif /* COMPAT_43 */
 	if (error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) {
 		vput(vp);
 		return (error);
 	}
 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
 		if (!strcmp(vfsp->vfc_name, fstypename))
 			break;
 	if (vfsp == NULL) {
 		vput(vp);
 		return (ENODEV);
 	}
 	if (vp->v_mountedhere != NULL) {
 		vput(vp);
 		return (EBUSY);
 	}
 
 	/*
 	 * Allocate and initialize the filesystem.
 	 */
 	mp = (struct mount *)malloc((u_long)sizeof(struct mount),
 		M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE);
 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
 	mp->mnt_op = vfsp->vfc_vfsops;
 	mp->mnt_vfc = vfsp;
 	vfsp->vfc_refcount++;
 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
 	vp->v_mountedhere = mp;
 	mp->mnt_vnodecovered = vp;
 	mp->mnt_stat.f_owner = p->p_ucred->cr_uid;
 update:
 	/*
 	 * Set the mount level flags.
 	 */
 	if (SCARG(uap, flags) & MNT_RDONLY)
 		mp->mnt_flag |= MNT_RDONLY;
 	else if (mp->mnt_flag & MNT_RDONLY)
 		mp->mnt_kern_flag |= MNTK_WANTRDWR;
 	mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
 	    MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME |
 	    MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
 	mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC |
 	    MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE |
 	    MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR);
 	/*
 	 * Mount the filesystem.
 	 */
 	error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, p);
 	if (mp->mnt_flag & MNT_UPDATE) {
 		vrele(vp);
 		if (mp->mnt_kern_flag & MNTK_WANTRDWR)
 			mp->mnt_flag &= ~MNT_RDONLY;
 		mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE);
 		mp->mnt_kern_flag &=~ MNTK_WANTRDWR;
 		if (error) {
 			mp->mnt_flag = flag;
 			mp->mnt_kern_flag = flag2;
 		}
+		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+			if (mp->mnt_syncer == NULL)
+				error = vfs_allocate_syncvnode(mp);
+		} else {
+			if (mp->mnt_syncer != NULL)
+				vrele(mp->mnt_syncer);
+			mp->mnt_syncer = NULL;
+		}
 		vfs_unbusy(mp, p);
 		return (error);
 	}
 	/*
 	 * Put the new filesystem on the mount list after root.
 	 */
 	cache_purge(vp);
 	if (!error) {
 		simple_lock(&mountlist_slock);
 		CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 		simple_unlock(&mountlist_slock);
 		checkdirs(vp);
 		VOP_UNLOCK(vp, 0, p);
+		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+			error = vfs_allocate_syncvnode(mp);
 		vfs_unbusy(mp, p);
 		if (error = VFS_START(mp, 0, p))
 			vrele(vp);
 	} else {
 		mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
 		mp->mnt_vfc->vfc_refcount--;
 		vfs_unbusy(mp, p);
 		free((caddr_t)mp, M_MOUNT);
 		vput(vp);
 	}
 	return (error);
 }
 
 /*
  * Scan all active processes to see if any of them have a current
  * or root directory onto which the new filesystem has just been
  * mounted. If so, replace them with the new mount point.
  */
 static void
 checkdirs(olddp)
 	struct vnode *olddp;
 {
 	struct filedesc *fdp;
 	struct vnode *newdp;
 	struct proc *p;
 
 	if (olddp->v_usecount == 1)
 		return;
 	if (VFS_ROOT(olddp->v_mountedhere, &newdp))
 		panic("mount: lost mount");
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		fdp = p->p_fd;
 		if (fdp->fd_cdir == olddp) {
 			vrele(fdp->fd_cdir);
 			VREF(newdp);
 			fdp->fd_cdir = newdp;
 		}
 		if (fdp->fd_rdir == olddp) {
 			vrele(fdp->fd_rdir);
 			VREF(newdp);
 			fdp->fd_rdir = newdp;
 		}
 	}
 	if (rootvnode == olddp) {
 		vrele(rootvnode);
 		VREF(newdp);
 		rootvnode = newdp;
 	}
 	vput(newdp);
 }
 
 /*
  * Unmount a file system.
  *
  * Note: unmount takes a path to the vnode mounted on as argument,
  * not special file (as before).
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unmount_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 unmount(p, uap)
 	struct proc *p;
 	register struct unmount_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	mp = vp->v_mount;
 
 	/*
 	 * Only root, or the user that did the original mount is
 	 * permitted to unmount this filesystem.
 	 */
 	if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) &&
 	    (error = suser(p->p_ucred, &p->p_acflag))) {
 		vput(vp);
 		return (error);
 	}
 
 	/*
 	 * Don't allow unmounting the root file system.
 	 */
 	if (mp->mnt_flag & MNT_ROOTFS) {
 		vput(vp);
 		return (EINVAL);
 	}
 
 	/*
 	 * Must be the root of the filesystem
 	 */
 	if ((vp->v_flag & VROOT) == 0) {
 		vput(vp);
 		return (EINVAL);
 	}
 	vput(vp);
 	return (dounmount(mp, SCARG(uap, flags), p));
 }
 
 /*
  * Do the actual file system unmount.
  */
 int
 dounmount(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	struct vnode *coveredvp;
 	int error;
 
 	simple_lock(&mountlist_slock);
 	mp->mnt_kern_flag |= MNTK_UNMOUNT;
 	lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK, &mountlist_slock, p);
 
 	if (mp->mnt_flag & MNT_EXPUBLIC)
 		vfs_setpublicfs(NULL, NULL, NULL);
 
 	vfs_msync(mp, MNT_WAIT);
 	mp->mnt_flag &=~ MNT_ASYNC;
 	cache_purgevfs(mp);	/* remove cache entries for this file sys */
+	if (mp->mnt_syncer != NULL)
+		vrele(mp->mnt_syncer);
 	if (((mp->mnt_flag & MNT_RDONLY) ||
 	     (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) ||
 	    (flags & MNT_FORCE))
 		error = VFS_UNMOUNT(mp, flags, p);
 	simple_lock(&mountlist_slock);
 	if (error) {
+		if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL)
+			(void) vfs_allocate_syncvnode(mp);
 		mp->mnt_kern_flag &= ~MNTK_UNMOUNT;
 		lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE,
 		    &mountlist_slock, p);
 		return (error);
 	}
 	CIRCLEQ_REMOVE(&mountlist, mp, mnt_list);
 	if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) {
 		coveredvp->v_mountedhere = (struct mount *)0;
 		vrele(coveredvp);
 	}
 	mp->mnt_vfc->vfc_refcount--;
 	if (mp->mnt_vnodelist.lh_first != NULL)
 		panic("unmount: dangling vnode");
 	lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_slock, p);
 	if (mp->mnt_kern_flag & MNTK_MWAIT)
 		wakeup((caddr_t)mp);
 	free((caddr_t)mp, M_MOUNT);
 	return (0);
 }
 
 /*
  * Sync each mounted filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct sync_args {
         int     dummy;
 };
 #endif
 
 #ifdef DEBUG
 static int syncprt = 0;
 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, "");
 #endif
 
 /* ARGSUSED */
 int
 sync(p, uap)
 	struct proc *p;
 	struct sync_args *uap;
 {
 	register struct mount *mp, *nmp;
 	int asyncflag;
 
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		if ((mp->mnt_flag & MNT_RDONLY) == 0) {
 			asyncflag = mp->mnt_flag & MNT_ASYNC;
 			mp->mnt_flag &= ~MNT_ASYNC;
 			vfs_msync(mp, MNT_NOWAIT);
-			VFS_SYNC(mp, MNT_NOWAIT, p != NULL ? p->p_ucred : NOCRED, p);
-			if (asyncflag)
-				mp->mnt_flag |= MNT_ASYNC;
+			VFS_SYNC(mp, MNT_NOWAIT,
+				((p != NULL) ? p->p_ucred : NOCRED), p);
+			mp->mnt_flag |= asyncflag;
 		}
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 #if 0
 /*
  * XXX don't call vfs_bufstats() yet because that routine
  * was not imported in the Lite2 merge.
  */
 #ifdef DIAGNOSTIC
 	if (syncprt)
 		vfs_bufstats();
 #endif /* DIAGNOSTIC */
 #endif
 	return (0);
 }
 
 /*
  * Change filesystem quotas.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct quotactl_args {
 	char *path;
 	int cmd;
 	int uid;
 	caddr_t arg;
 };
 #endif
 /* ARGSUSED */
 int
 quotactl(p, uap)
 	struct proc *p;
 	register struct quotactl_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) cmd;
 		syscallarg(int) uid;
 		syscallarg(caddr_t) arg;
 	} */ *uap;
 {
 	register struct mount *mp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	vrele(nd.ni_vp);
 	return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid),
 	    SCARG(uap, arg), p));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct statfs_args {
 	char *path;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 statfs(p, uap)
 	struct proc *p;
 	register struct statfs_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct statfs *) buf;
 	} */ *uap;
 {
 	register struct mount *mp;
 	register struct statfs *sp;
 	int error;
 	struct nameidata nd;
 	struct statfs sb;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	mp = nd.ni_vp->v_mount;
 	sp = &mp->mnt_stat;
 	vrele(nd.ni_vp);
 	error = VFS_STATFS(mp, sp, p);
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (p->p_ucred->cr_uid != 0) {
 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
 }
 
 /*
  * Get filesystem statistics.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fstatfs_args {
 	int fd;
 	struct statfs *buf;
 };
 #endif
 /* ARGSUSED */
 int
 fstatfs(p, uap)
 	struct proc *p;
 	register struct fstatfs_args /* {
 		syscallarg(int) fd;
 		syscallarg(struct statfs *) buf;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mount *mp;
 	register struct statfs *sp;
 	int error;
 	struct statfs sb;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	mp = ((struct vnode *)fp->f_data)->v_mount;
 	sp = &mp->mnt_stat;
 	error = VFS_STATFS(mp, sp, p);
 	if (error)
 		return (error);
 	sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 	if (p->p_ucred->cr_uid != 0) {
 		bcopy((caddr_t)sp, (caddr_t)&sb, sizeof(sb));
 		sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0;
 		sp = &sb;
 	}
 	return (copyout((caddr_t)sp, (caddr_t)SCARG(uap, buf), sizeof(*sp)));
 }
 
 /*
  * Get statistics on all filesystems.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getfsstat_args {
 	struct statfs *buf;
 	long bufsize;
 	int flags;
 };
 #endif
 int
 getfsstat(p, uap)
 	struct proc *p;
 	register struct getfsstat_args /* {
 		syscallarg(struct statfs *) buf;
 		syscallarg(long) bufsize;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct mount *mp, *nmp;
 	register struct statfs *sp;
 	caddr_t sfsp;
 	long count, maxcount, error;
 
 	maxcount = SCARG(uap, bufsize) / sizeof(struct statfs);
 	sfsp = (caddr_t)SCARG(uap, buf);
 	count = 0;
 	simple_lock(&mountlist_slock);
 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
 			nmp = mp->mnt_list.cqe_next;
 			continue;
 		}
 		if (sfsp && count < maxcount) {
 			sp = &mp->mnt_stat;
 			/*
-			 * If MNT_NOWAIT is specified, do not refresh the
-			 * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+			 * If MNT_NOWAIT or MNT_LAZY is specified, do not
+			 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY
+			 * overrides MNT_WAIT.
 			 */
-			if (((SCARG(uap, flags) & MNT_NOWAIT) == 0 ||
+			if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 ||
 			    (SCARG(uap, flags) & MNT_WAIT)) &&
 			    (error = VFS_STATFS(mp, sp, p))) {
 				simple_lock(&mountlist_slock);
 				nmp = mp->mnt_list.cqe_next;
 				vfs_unbusy(mp, p);
 				continue;
 			}
 			sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
 			error = copyout((caddr_t)sp, sfsp, sizeof(*sp));
 			if (error) {
 				vfs_unbusy(mp, p);
 				return (error);
 			}
 			sfsp += sizeof(*sp);
 		}
 		count++;
 		simple_lock(&mountlist_slock);
 		nmp = mp->mnt_list.cqe_next;
 		vfs_unbusy(mp, p);
 	}
 	simple_unlock(&mountlist_slock);
 	if (sfsp && count > maxcount)
 		p->p_retval[0] = maxcount;
 	else
 		p->p_retval[0] = count;
 	return (0);
 }
 
 /*
  * Change current working directory to a given file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchdir_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fchdir(p, uap)
 	struct proc *p;
 	struct fchdir_args /* {
 		syscallarg(int) fd;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct vnode *vp, *tdp;
 	struct mount *mp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(fdp, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VREF(vp);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 	else
 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 	while (!error && (mp = vp->v_mountedhere) != NULL) {
 		if (vfs_busy(mp, 0, 0, p))
 			continue;
 		error = VFS_ROOT(mp, &tdp);
 		vfs_unbusy(mp, p);
 		if (error)
 			break;
 		vput(vp);
 		vp = tdp;
 	}
 	if (error) {
 		vput(vp);
 		return (error);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	vrele(fdp->fd_cdir);
 	fdp->fd_cdir = vp;
 	return (0);
 }
 
 /*
  * Change current working directory (``.'').
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chdir(p, uap)
 	struct proc *p;
 	struct chdir_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = change_dir(&nd, p))
 		return (error);
 	vrele(fdp->fd_cdir);
 	fdp->fd_cdir = nd.ni_vp;
 	return (0);
 }
 
 /*
  * Change notion of root (``/'') directory.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chroot_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 chroot(p, uap)
 	struct proc *p;
 	struct chroot_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	int error;
 	struct nameidata nd;
 
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = change_dir(&nd, p))
 		return (error);
 	vrele(fdp->fd_rdir);
 	fdp->fd_rdir = nd.ni_vp;
 	return (0);
 }
 
 /*
  * Common routine for chroot and chdir.
  */
 static int
 change_dir(ndp, p)
 	register struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *vp;
 	int error;
 
 	error = namei(ndp);
 	if (error)
 		return (error);
 	vp = ndp->ni_vp;
 	if (vp->v_type != VDIR)
 		error = ENOTDIR;
 	else
 		error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
 	if (error)
 		vput(vp);
 	else
 		VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Check permissions, allocate an open file structure,
  * and call the device open routine if any.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct open_args {
 	char	*path;
 	int	flags;
 	int	mode;
 };
 #endif
 int
 open(p, uap)
 	struct proc *p;
 	register struct open_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	register struct vnode *vp;
 	int cmode, flags, oflags;
 	struct file *nfp;
 	int type, indx, error;
 	struct flock lf;
 	struct nameidata nd;
 
 	oflags = SCARG(uap, flags);
 	if ((oflags & O_ACCMODE) == O_ACCMODE)
 		return (EINVAL);
 	flags = FFLAGS(oflags);
 	error = falloc(p, &nfp, &indx);
 	if (error)
 		return (error);
 	fp = nfp;
 	cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	p->p_dupfd = -indx - 1;			/* XXX check for fdopen */
 	error = vn_open(&nd, flags, cmode);
 	if (error) {
 		ffree(fp);
 		if ((error == ENODEV || error == ENXIO) &&
 		    p->p_dupfd >= 0 &&			/* XXX from fdopen */
 		    (error =
 			dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
 			p->p_retval[0] = indx;
 			return (0);
 		}
 		if (error == ERESTART)
 			error = EINTR;
 		fdp->fd_ofiles[indx] = NULL;
 		return (error);
 	}
 	p->p_dupfd = 0;
 	vp = nd.ni_vp;
 
 	fp->f_flag = flags & FMASK;
 	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
 	fp->f_ops = &vnops;
 	fp->f_data = (caddr_t)vp;
 	if (flags & (O_EXLOCK | O_SHLOCK)) {
 		lf.l_whence = SEEK_SET;
 		lf.l_start = 0;
 		lf.l_len = 0;
 		if (flags & O_EXLOCK)
 			lf.l_type = F_WRLCK;
 		else
 			lf.l_type = F_RDLCK;
 		type = F_FLOCK;
 		if ((flags & FNONBLOCK) == 0)
 			type |= F_WAIT;
 		VOP_UNLOCK(vp, 0, p);
 		if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
 			(void) vn_close(vp, fp->f_flag, fp->f_cred, p);
 			ffree(fp);
 			fdp->fd_ofiles[indx] = NULL;
 			return (error);
 		}
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		fp->f_flag |= FHASLOCK;
 	}
 	if ((vp->v_type == VREG) && (vp->v_object == NULL))
 		vfs_object_create(vp, p, p->p_ucred, TRUE);
 	VOP_UNLOCK(vp, 0, p);
 	p->p_retval[0] = indx;
 	return (0);
 }
 
 #ifdef COMPAT_43
 /*
  * Create a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ocreat_args {
 	char	*path;
 	int	mode;
 };
 #endif
 int
 ocreat(p, uap)
 	struct proc *p;
 	register struct ocreat_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct open_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 		syscallarg(int) mode;
 	} */ nuap;
 
 	SCARG(&nuap, path) = SCARG(uap, path);
 	SCARG(&nuap, mode) = SCARG(uap, mode);
 	SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC;
 	return (open(p, &nuap));
 }
 #endif /* COMPAT_43 */
 
 /*
  * Create a special file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mknod_args {
 	char	*path;
 	int	mode;
 	int	dev;
 };
 #endif
 /* ARGSUSED */
 int
 mknod(p, uap)
 	struct proc *p;
 	register struct mknod_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 		syscallarg(int) dev;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	int whiteout;
 	struct nameidata nd;
 
 	error = suser(p->p_ucred, &p->p_acflag);
 	if (error)
 		return (error);
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL)
 		error = EEXIST;
 	else {
 		VATTR_NULL(&vattr);
 		vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 		vattr.va_rdev = SCARG(uap, dev);
 		whiteout = 0;
 
 		switch (SCARG(uap, mode) & S_IFMT) {
 		case S_IFMT:	/* used by badsect to flag bad sectors */
 			vattr.va_type = VBAD;
 			break;
 		case S_IFCHR:
 			vattr.va_type = VCHR;
 			break;
 		case S_IFBLK:
 			vattr.va_type = VBLK;
 			break;
 		case S_IFWHT:
 			whiteout = 1;
 			break;
 		default:
 			error = EINVAL;
 			break;
 		}
 	}
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		if (whiteout) {
 			error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE);
 			if (error)
 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 			vput(nd.ni_dvp);
 		} else {
 			error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp,
 						&nd.ni_cnd, &vattr);
 		}
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp)
 			vrele(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod");
 	return (error);
 }
 
 /*
  * Create a named pipe.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkfifo_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkfifo(p, uap)
 	struct proc *p;
 	register struct mkfifo_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	if (nd.ni_vp != NULL) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VFIFO;
 	vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
 }
 
 /*
  * Make a hard file link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct link_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 link(p, uap)
 	struct proc *p;
 	register struct link_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) link;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct nameidata nd;
 	int error;
 
 	NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 		error = namei(&nd);
 		if (!error) {
 			if (nd.ni_vp != NULL) {
 				VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 				if (nd.ni_dvp == nd.ni_vp)
 					vrele(nd.ni_dvp);
 				else
 					vput(nd.ni_dvp);
 				if (nd.ni_vp)
 					vrele(nd.ni_vp);
 				error = EEXIST;
 			} else {
 				VOP_LEASE(nd.ni_dvp, p, p->p_ucred,
 				    LEASE_WRITE);
 				VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 				error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
 			}
 		}
 	}
 	vrele(vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "link");
 	return (error);
 }
 
 /*
  * Make a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct symlink_args {
 	char	*path;
 	char	*link;
 };
 #endif
 /* ARGSUSED */
 int
 symlink(p, uap)
 	struct proc *p;
 	register struct symlink_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) link;
 	} */ *uap;
 {
 	struct vattr vattr;
 	char *path;
 	int error;
 	struct nameidata nd;
 
 	path = zalloc(namei_zone);
 	if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL))
 		goto out;
 	NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p);
 	if (error = namei(&nd))
 		goto out;
 	if (nd.ni_vp) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(nd.ni_vp);
 		error = EEXIST;
 		goto out;
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink");
 out:
 	zfree(namei_zone, path);
 	return (error);
 }
 
 /*
  * Delete a whiteout from the filesystem.
  */
 /* ARGSUSED */
 int
 undelete(p, uap)
 	struct proc *p;
 	register struct undelete_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	error = namei(&nd);
 	if (error)
 		return (error);
 
 	if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == nd.ni_vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (nd.ni_vp)
 			vrele(nd.ni_vp);
 		return (EEXIST);
 	}
 
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	if (error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE))
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 	vput(nd.ni_dvp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete");
 	return (error);
 }
 
 /*
  * Delete a name from the filesystem.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct unlink_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 unlink(p, uap)
 	struct proc *p;
 	struct unlink_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 
 	if (vp->v_type == VDIR)
 		error = EPERM;		/* POSIX */
 	else {
 		/*
 		 * The root of a mounted filesystem cannot be deleted.
 		 *
 		 * XXX: can this only be a VDIR case?
 		 */
 		if (vp->v_flag & VROOT)
 			error = EBUSY;
 	}
 
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd);
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		if (vp != NULLVP)
 			vput(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink");
 	return (error);
 }
 
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lseek_args {
 	int	fd;
 	int	pad;
 	off_t	offset;
 	int	whence;
 };
 #endif
 int
 lseek(p, uap)
 	struct proc *p;
 	register struct lseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) offset;
 		syscallarg(int) whence;
 	} */ *uap;
 {
 	struct ucred *cred = p->p_ucred;
 	register struct filedesc *fdp = p->p_fd;
 	register struct file *fp;
 	struct vattr vattr;
 	int error;
 
 	if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_VNODE)
 		return (ESPIPE);
 	switch (SCARG(uap, whence)) {
 	case L_INCR:
 		fp->f_offset += SCARG(uap, offset);
 		break;
 	case L_XTND:
 		error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p);
 		if (error)
 			return (error);
 		fp->f_offset = SCARG(uap, offset) + vattr.va_size;
 		break;
 	case L_SET:
 		fp->f_offset = SCARG(uap, offset);
 		break;
 	default:
 		return (EINVAL);
 	}
 	*(off_t *)(p->p_retval) = fp->f_offset;
 	return (0);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Reposition read/write file offset.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olseek_args {
 	int	fd;
 	long	offset;
 	int	whence;
 };
 #endif
 int
 olseek(p, uap)
 	struct proc *p;
 	register struct olseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(long) offset;
 		syscallarg(int) whence;
 	} */ *uap;
 {
 	struct lseek_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) offset;
 		syscallarg(int) whence;
 	} */ nuap;
 	int error;
 
 	SCARG(&nuap, fd) = SCARG(uap, fd);
 	SCARG(&nuap, offset) = SCARG(uap, offset);
 	SCARG(&nuap, whence) = SCARG(uap, whence);
 	error = lseek(p, &nuap);
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Check access permissions.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct access_args {
 	char	*path;
 	int	flags;
 };
 #endif
 int
 access(p, uap)
 	struct proc *p;
 	register struct access_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct ucred *cred = p->p_ucred;
 	register struct vnode *vp;
 	int error, flags, t_gid, t_uid;
 	struct nameidata nd;
 
 	t_uid = cred->cr_uid;
 	t_gid = cred->cr_groups[0];
 	cred->cr_uid = p->p_cred->p_ruid;
 	cred->cr_groups[0] = p->p_cred->p_rgid;
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		goto out1;
 	vp = nd.ni_vp;
 
 	/* Flags == 0 means only check for existence. */
 	if (SCARG(uap, flags)) {
 		flags = 0;
 		if (SCARG(uap, flags) & R_OK)
 			flags |= VREAD;
 		if (SCARG(uap, flags) & W_OK)
 			flags |= VWRITE;
 		if (SCARG(uap, flags) & X_OK)
 			flags |= VEXEC;
 		if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
 			error = VOP_ACCESS(vp, flags, cred, p);
 	}
 	vput(vp);
 out1:
 	cred->cr_uid = t_uid;
 	cred->cr_groups[0] = t_gid;
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ostat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 ostat(p, uap)
 	struct proc *p;
 	register struct ostat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct ostat *) ub;
 	} */ *uap;
 {
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, p);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct olstat_args {
 	char	*path;
 	struct ostat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 olstat(p, uap)
 	struct proc *p;
 	register struct olstat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct ostat *) ub;
 	} */ *uap;
 {
 	struct vnode *vp;
 	struct stat sb;
 	struct ostat osb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, p);
 	if (vp->v_type == VLNK)
 		sb.st_mode |= S_IFLNK | ACCESSPERMS;	/* 0777 */
 	vput(vp);
 	if (error)
 		return (error);
 	cvtstat(&sb, &osb);
 	error = copyout((caddr_t)&osb, (caddr_t)SCARG(uap, ub), sizeof (osb));
 	return (error);
 }
 
 /*
  * Convert from an old to a new stat structure.
  */
 void
 cvtstat(st, ost)
 	struct stat *st;
 	struct ostat *ost;
 {
 
 	ost->st_dev = st->st_dev;
 	ost->st_ino = st->st_ino;
 	ost->st_mode = st->st_mode;
 	ost->st_nlink = st->st_nlink;
 	ost->st_uid = st->st_uid;
 	ost->st_gid = st->st_gid;
 	ost->st_rdev = st->st_rdev;
 	if (st->st_size < (quad_t)1 << 32)
 		ost->st_size = st->st_size;
 	else
 		ost->st_size = -2;
 	ost->st_atime = st->st_atime;
 	ost->st_mtime = st->st_mtime;
 	ost->st_ctime = st->st_ctime;
 	ost->st_blksize = st->st_blksize;
 	ost->st_blocks = st->st_blocks;
 	ost->st_flags = st->st_flags;
 	ost->st_gen = st->st_gen;
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Get file status; this version follows links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct stat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 stat(p, uap)
 	struct proc *p;
 	register struct stat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct stat *) ub;
 	} */ *uap;
 {
 	struct stat sb;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = vn_stat(nd.ni_vp, &sb, p);
 	vput(nd.ni_vp);
 	if (error)
 		return (error);
 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
 	return (error);
 }
 
 /*
  * Get file status; this version does not follow links.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lstat_args {
 	char	*path;
 	struct stat *ub;
 };
 #endif
 /* ARGSUSED */
 int
 lstat(p, uap)
 	struct proc *p;
 	register struct lstat_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct stat *) ub;
 	} */ *uap;
 {
 	int error;
 	struct vnode *vp;
 	struct stat sb;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	error = vn_stat(vp, &sb, p);
 	if (vp->v_type == VLNK)
 		sb.st_mode |= S_IFLNK | ACCESSPERMS;	/* 0777 */
 	vput(vp);
 	if (error)
 		return (error);
 	error = copyout((caddr_t)&sb, (caddr_t)SCARG(uap, ub), sizeof (sb));
 	return (error);
 }
 
 /*
  * Get configurable pathname variables.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct pathconf_args {
 	char	*path;
 	int	name;
 };
 #endif
 /* ARGSUSED */
 int
 pathconf(p, uap)
 	struct proc *p;
 	register struct pathconf_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) name;
 	} */ *uap;
 {
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), p->p_retval);
 	vput(nd.ni_vp);
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct readlink_args {
 	char	*path;
 	char	*buf;
 	int	count;
 };
 #endif
 /* ARGSUSED */
 int
 readlink(p, uap)
 	struct proc *p;
 	register struct readlink_args /* {
 		syscallarg(char *) path;
 		syscallarg(char *) buf;
 		syscallarg(int) count;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VLNK)
 		error = EINVAL;
 	else {
 		aiov.iov_base = SCARG(uap, buf);
 		aiov.iov_len = SCARG(uap, count);
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		auio.uio_offset = 0;
 		auio.uio_rw = UIO_READ;
 		auio.uio_segflg = UIO_USERSPACE;
 		auio.uio_procp = p;
 		auio.uio_resid = SCARG(uap, count);
 		error = VOP_READLINK(vp, &auio, p->p_ucred);
 	}
 	vput(vp);
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Change flags of a file given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chflags_args {
 	char	*path;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 chflags(p, uap)
 	struct proc *p;
 	register struct chflags_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = SCARG(uap, flags);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Change flags of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchflags_args {
 	int	fd;
 	int	flags;
 };
 #endif
 /* ARGSUSED */
 int
 fchflags(p, uap)
 	struct proc *p;
 	register struct fchflags_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) flags;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_flags = SCARG(uap, flags);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Change mode of a file given path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chmod_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 chmod(p, uap)
 	struct proc *p;
 	register struct chmod_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Change mode of a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchmod_args {
 	int	fd;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 fchmod(p, uap)
 	struct proc *p;
 	register struct fchmod_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_mode = SCARG(uap, mode) & ALLPERMS;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Set ownership given a path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct chown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 chown(p, uap)
 	struct proc *p;
 	register struct chown_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Set ownership given a path name, do not cross symlinks.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct lchown_args {
 	char	*path;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 lchown(p, uap)
 	struct proc *p;
 	register struct lchown_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Set ownership given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fchown_args {
 	int	fd;
 	int	uid;
 	int	gid;
 };
 #endif
 /* ARGSUSED */
 int
 fchown(p, uap)
 	struct proc *p;
 	register struct fchown_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) uid;
 		syscallarg(int) gid;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	VATTR_NULL(&vattr);
 	vattr.va_uid = SCARG(uap, uid);
 	vattr.va_gid = SCARG(uap, gid);
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 /*
  * Set the access and modification times of a file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct utimes_args {
 	char	*path;
 	struct	timeval *tptr;
 };
 #endif
 /* ARGSUSED */
 int
 utimes(p, uap)
 	struct proc *p;
 	register struct utimes_args /* {
 		syscallarg(char *) path;
 		syscallarg(struct timeval *) tptr;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct timeval tv[2];
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	VATTR_NULL(&vattr);
 	if (SCARG(uap, tptr) == NULL) {
 		microtime(&tv[0]);
 		tv[1] = tv[0];
 		vattr.va_vaflags |= VA_UTIMES_NULL;
 	} else if (error = copyin((caddr_t)SCARG(uap, tptr), (caddr_t)tv,
 	    sizeof (tv)))
   		return (error);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	vattr.va_atime.tv_sec = tv[0].tv_sec;
 	vattr.va_atime.tv_nsec = tv[0].tv_usec * 1000;
 	vattr.va_mtime.tv_sec = tv[1].tv_sec;
 	vattr.va_mtime.tv_nsec = tv[1].tv_usec * 1000;
 	error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct truncate_args {
 	char	*path;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 truncate(p, uap)
 	struct proc *p;
 	register struct truncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 	else if ((error = vn_writechk(vp)) == 0 &&
 	    (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = SCARG(uap, length);
 		error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
 	}
 	vput(vp);
 	return (error);
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ftruncate_args {
 	int	fd;
 	int	pad;
 	off_t	length;
 };
 #endif
 /* ARGSUSED */
 int
 ftruncate(p, uap)
 	struct proc *p;
 	register struct ftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ *uap;
 {
 	struct vattr vattr;
 	struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (uap->length < 0)
 		return(EINVAL);
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FWRITE) == 0)
 		return (EINVAL);
 	vp = (struct vnode *)fp->f_data;
 	VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	if (vp->v_type == VDIR)
 		error = EISDIR;
 	else if ((error = vn_writechk(vp)) == 0) {
 		VATTR_NULL(&vattr);
 		vattr.va_size = SCARG(uap, length);
 		error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	return (error);
 }
 
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 /*
  * Truncate a file given its path name.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct otruncate_args {
 	char	*path;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 otruncate(p, uap)
 	struct proc *p;
 	register struct otruncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(long) length;
 	} */ *uap;
 {
 	struct truncate_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ nuap;
 
 	SCARG(&nuap, path) = SCARG(uap, path);
 	SCARG(&nuap, length) = SCARG(uap, length);
 	return (truncate(p, &nuap));
 }
 
 /*
  * Truncate a file given a file descriptor.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct oftruncate_args {
 	int	fd;
 	long	length;
 };
 #endif
 /* ARGSUSED */
 int
 oftruncate(p, uap)
 	struct proc *p;
 	register struct oftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(long) length;
 	} */ *uap;
 {
 	struct ftruncate_args /* {
 		syscallarg(int) fd;
 		syscallarg(int) pad;
 		syscallarg(off_t) length;
 	} */ nuap;
 
 	SCARG(&nuap, fd) = SCARG(uap, fd);
 	SCARG(&nuap, length) = SCARG(uap, length);
 	return (ftruncate(p, &nuap));
 }
 #endif /* COMPAT_43 || COMPAT_SUNOS */
 
 /*
  * Sync an open file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct fsync_args {
 	int	fd;
 };
 #endif
 /* ARGSUSED */
 int
 fsync(p, uap)
 	struct proc *p;
 	struct fsync_args /* {
 		syscallarg(int) fd;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	int error;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	vp = (struct vnode *)fp->f_data;
 	if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p)) == NULL) {
 		if (vp->v_object) {
 			vm_object_page_clean(vp->v_object, 0, 0, FALSE);
 		}
 		error = VOP_FSYNC(vp, fp->f_cred,
 			(vp->v_mount && (vp->v_mount->mnt_flag & MNT_ASYNC)) ? 
 			MNT_NOWAIT : MNT_WAIT, p);
 		VOP_UNLOCK(vp, 0, p);
 	}
 	return (error);
 }
 
 /*
  * Rename files.  Source and destination must either both be directories,
  * or both not be directories.  If target is a directory, it must be empty.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rename_args {
 	char	*from;
 	char	*to;
 };
 #endif
 /* ARGSUSED */
 int
 rename(p, uap)
 	struct proc *p;
 	register struct rename_args /* {
 		syscallarg(char *) from;
 		syscallarg(char *) to;
 	} */ *uap;
 {
 	register struct vnode *tvp, *fvp, *tdvp;
 	struct nameidata fromnd, tond;
 	int error;
 
 	NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
 	    SCARG(uap, from), p);
 	if (error = namei(&fromnd))
 		return (error);
 	fvp = fromnd.ni_vp;
 	NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ,
 	    UIO_USERSPACE, SCARG(uap, to), p);
 	if (fromnd.ni_vp->v_type == VDIR)
 		tond.ni_cnd.cn_flags |= WILLBEDIR;
 	if (error = namei(&tond)) {
 		/* Translate error code for rename("dir1", "dir2/."). */
 		if (error == EISDIR && fvp->v_type == VDIR)
 			error = EINVAL;
 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 		goto out1;
 	}
 	tdvp = tond.ni_dvp;
 	tvp = tond.ni_vp;
 	if (tvp != NULL) {
 		if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
 			error = ENOTDIR;
 			goto out;
 		} else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
 			error = EISDIR;
 			goto out;
 		}
 	}
 	if (fvp == tdvp)
 		error = EINVAL;
 	/*
 	 * If source is the same as the destination (that is the
 	 * same inode number with the same name in the same directory),
 	 * then there is nothing to do.
 	 */
 	if (fvp == tvp && fromnd.ni_dvp == tdvp &&
 	    fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
 	    !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
 	      fromnd.ni_cnd.cn_namelen))
 		error = -1;
 out:
 	if (!error) {
 		VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE);
 		if (fromnd.ni_dvp != tdvp) {
 			VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		if (tvp) {
 			VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE);
 		}
 		error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
 				   tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
 	} else {
 		VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
 		vrele(fromnd.ni_dvp);
 		vrele(fvp);
 	}
 	vrele(tond.ni_startdir);
 	ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename");
 	ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename");
 	zfree(namei_zone, tond.ni_cnd.cn_pnbuf);
 out1:
 	if (fromnd.ni_startdir)
 		vrele(fromnd.ni_startdir);
 	zfree(namei_zone, fromnd.ni_cnd.cn_pnbuf);
 	if (error == -1)
 		return (0);
 	return (error);
 }
 
 /*
  * Make a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct mkdir_args {
 	char	*path;
 	int	mode;
 };
 #endif
 /* ARGSUSED */
 int
 mkdir(p, uap)
 	struct proc *p;
 	register struct mkdir_args /* {
 		syscallarg(char *) path;
 		syscallarg(int) mode;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), p);
 	nd.ni_cnd.cn_flags |= WILLBEDIR;
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp != NULL) {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vrele(vp);
 		return (EEXIST);
 	}
 	VATTR_NULL(&vattr);
 	vattr.va_type = VDIR;
 	vattr.va_mode = (SCARG(uap, mode) & ACCESSPERMS) &~ p->p_fd->fd_cmask;
 	VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 	error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
 	if (!error)
 		vput(nd.ni_vp);
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir");
 	return (error);
 }
 
 /*
  * Remove a directory file.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct rmdir_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 rmdir(p, uap)
 	struct proc *p;
 	struct rmdir_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE,
 	    SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (vp->v_type != VDIR) {
 		error = ENOTDIR;
 		goto out;
 	}
 	/*
 	 * No rmdir "." please.
 	 */
 	if (nd.ni_dvp == vp) {
 		error = EINVAL;
 		goto out;
 	}
 	/*
 	 * The root of a mounted filesystem cannot be deleted.
 	 */
 	if (vp->v_flag & VROOT)
 		error = EBUSY;
 out:
 	if (!error) {
 		VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
 		VOP_LEASE(vp, p, p->p_ucred, LEASE_WRITE);
 		error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
 	} else {
 		VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
 		if (nd.ni_dvp == vp)
 			vrele(nd.ni_dvp);
 		else
 			vput(nd.ni_dvp);
 		vput(vp);
 	}
 	ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir");
 	ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir");
 	return (error);
 }
 
 #ifdef COMPAT_43
 /*
  * Read a block of directory entries in a file system independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct ogetdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 ogetdirentries(p, uap)
 	struct proc *p;
 	register struct ogetdirentries_args /* {
 		syscallarg(int) fd;
 		syscallarg(char *) buf;
 		syscallarg(u_int) count;
 		syscallarg(long *) basep;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	struct uio auio, kuio;
 	struct iovec aiov, kiov;
 	struct dirent *dp, *edp;
 	caddr_t dirbuf;
 	int error, eofflag, readcnt;
 	long loff;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	vp = (struct vnode *)fp->f_data;
 unionread:
 	if (vp->v_type != VDIR)
 		return (EINVAL);
 	aiov.iov_base = SCARG(uap, buf);
 	aiov.iov_len = SCARG(uap, count);
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_resid = SCARG(uap, count);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	loff = auio.uio_offset = fp->f_offset;
 #	if (BYTE_ORDER != LITTLE_ENDIAN)
 		if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 			error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 			fp->f_offset = auio.uio_offset;
 		} else
 #	endif
 	{
 		kuio = auio;
 		kuio.uio_iov = &kiov;
 		kuio.uio_segflg = UIO_SYSSPACE;
 		kiov.iov_len = SCARG(uap, count);
 		MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK);
 		kiov.iov_base = dirbuf;
 		error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag,
 			    NULL, NULL);
 		fp->f_offset = kuio.uio_offset;
 		if (error == 0) {
 			readcnt = SCARG(uap, count) - kuio.uio_resid;
 			edp = (struct dirent *)&dirbuf[readcnt];
 			for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 #				if (BYTE_ORDER == LITTLE_ENDIAN)
 					/*
 					 * The expected low byte of
 					 * dp->d_namlen is our dp->d_type.
 					 * The high MBZ byte of dp->d_namlen
 					 * is our dp->d_namlen.
 					 */
 					dp->d_type = dp->d_namlen;
 					dp->d_namlen = 0;
 #				else
 					/*
 					 * The dp->d_type is the high byte
 					 * of the expected dp->d_namlen,
 					 * so must be zero'ed.
 					 */
 					dp->d_type = 0;
 #				endif
 				if (dp->d_reclen > 0) {
 					dp = (struct dirent *)
 					    ((char *)dp + dp->d_reclen);
 				} else {
 					error = EIO;
 					break;
 				}
 			}
 			if (dp >= edp)
 				error = uiomove(dirbuf, readcnt, &auio);
 		}
 		FREE(dirbuf, M_TEMP);
 	}
 	VOP_UNLOCK(vp, 0, p);
 	if (error)
 		return (error);
 
 #ifdef UNION
 {
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_op == union_vnodeop_p)) {
 		struct vnode *lvp;
 
 		lvp = union_dircache(vp, p);
 		if (lvp != NULLVP) {
 			struct vattr va;
 
 			/*
 			 * If the directory is opaque,
 			 * then don't show lower entries
 			 */
 			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
 			if (va.va_flags & OPAQUE) {
 				vput(lvp);
 				lvp = NULL;
 			}
 		}
 		
 		if (lvp != NULLVP) {
 			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
 			if (error) {
 				vput(lvp);
 				return (error);
 			}
 			VOP_UNLOCK(lvp, 0, p);
 			fp->f_data = (caddr_t) lvp;
 			fp->f_offset = 0;
 			error = vn_close(vp, FREAD, fp->f_cred, p);
 			if (error)
 				return (error);
 			vp = lvp;
 			goto unionread;
 		}
 	}
 }
 #endif /* UNION */
 
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_flag & VROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_data = (caddr_t) vp;
 		fp->f_offset = 0;
 		vrele(tvp);
 		goto unionread;
 	}
 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
 	    sizeof(long));
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 #endif /* COMPAT_43 */
 
 /*
  * Read a block of directory entries in a file system independent format.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct getdirentries_args {
 	int	fd;
 	char	*buf;
 	u_int	count;
 	long	*basep;
 };
 #endif
 int
 getdirentries(p, uap)
 	struct proc *p;
 	register struct getdirentries_args /* {
 		syscallarg(int) fd;
 		syscallarg(char *) buf;
 		syscallarg(u_int) count;
 		syscallarg(long *) basep;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	long loff;
 	int error, eofflag;
 
 	if (error = getvnode(p->p_fd, SCARG(uap, fd), &fp))
 		return (error);
 	if ((fp->f_flag & FREAD) == 0)
 		return (EBADF);
 	vp = (struct vnode *)fp->f_data;
 unionread:
 	if (vp->v_type != VDIR)
 		return (EINVAL);
 	aiov.iov_base = SCARG(uap, buf);
 	aiov.iov_len = SCARG(uap, count);
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 	auio.uio_resid = SCARG(uap, count);
 	/* vn_lock(vp, LK_SHARED | LK_RETRY, p); */
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 	loff = auio.uio_offset = fp->f_offset;
 	error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL);
 	fp->f_offset = auio.uio_offset;
 	VOP_UNLOCK(vp, 0, p);
 	if (error)
 		return (error);
 
 #ifdef UNION
 {
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_op == union_vnodeop_p)) {
 		struct vnode *lvp;
 
 		lvp = union_dircache(vp, p);
 		if (lvp != NULLVP) {
 			struct vattr va;
 
 			/*
 			 * If the directory is opaque,
 			 * then don't show lower entries
 			 */
 			error = VOP_GETATTR(vp, &va, fp->f_cred, p);
 			if (va.va_flags & OPAQUE) {
 				vput(lvp);
 				lvp = NULL;
 			}
 		}
 
 		if (lvp != NULLVP) {
 			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
 			if (error) {
 				vput(lvp);
 				return (error);
 			}
 			VOP_UNLOCK(lvp, 0, p);
 			fp->f_data = (caddr_t) lvp;
 			fp->f_offset = 0;
 			error = vn_close(vp, FREAD, fp->f_cred, p);
 			if (error)
 				return (error);
 			vp = lvp;
 			goto unionread;
 		}
 	}
 }
 #endif /* UNION */
 
 	if ((SCARG(uap, count) == auio.uio_resid) &&
 	    (vp->v_flag & VROOT) &&
 	    (vp->v_mount->mnt_flag & MNT_UNION)) {
 		struct vnode *tvp = vp;
 		vp = vp->v_mount->mnt_vnodecovered;
 		VREF(vp);
 		fp->f_data = (caddr_t) vp;
 		fp->f_offset = 0;
 		vrele(tvp);
 		goto unionread;
 	}
 	error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep),
 	    sizeof(long));
 	p->p_retval[0] = SCARG(uap, count) - auio.uio_resid;
 	return (error);
 }
 
 /*
  * Set the mode mask for creation of filesystem nodes.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct umask_args {
 	int	newmask;
 };
 #endif
 int
 umask(p, uap)
 	struct proc *p;
 	struct umask_args /* {
 		syscallarg(int) newmask;
 	} */ *uap;
 {
 	register struct filedesc *fdp;
 
 	fdp = p->p_fd;
 	p->p_retval[0] = fdp->fd_cmask;
 	fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS;
 	return (0);
 }
 
 /*
  * Void all references to file by ripping underlying filesystem
  * away from vnode.
  */
 #ifndef _SYS_SYSPROTO_H_
 struct revoke_args {
 	char	*path;
 };
 #endif
 /* ARGSUSED */
 int
 revoke(p, uap)
 	struct proc *p;
 	register struct revoke_args /* {
 		syscallarg(char *) path;
 	} */ *uap;
 {
 	register struct vnode *vp;
 	struct vattr vattr;
 	int error;
 	struct nameidata nd;
 
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p);
 	if (error = namei(&nd))
 		return (error);
 	vp = nd.ni_vp;
 	if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
 		goto out;
 	if (p->p_ucred->cr_uid != vattr.va_uid &&
 	    (error = suser(p->p_ucred, &p->p_acflag)))
 		goto out;
 	if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
 		VOP_REVOKE(vp, REVOKEALL);
 out:
 	vrele(vp);
 	return (error);
 }
 
 /*
  * Convert a user file descriptor to a kernel file entry.
  */
 int
 getvnode(fdp, fd, fpp)
 	struct filedesc *fdp;
 	int fd;
 	struct file **fpp;
 {
 	struct file *fp;
 
 	if ((u_int)fd >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fd]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO)
 		return (EINVAL);
 	*fpp = fp;
 	return (0);
 }
 #ifndef _SYS_SYSPROTO_H_
 struct  __getcwd_args {
 	u_char	*buf;
 	u_int	buflen;
 };
 #endif
 #define STATNODE(mode, name, var) \
 	SYSCTL_INT(_vfs_cache, OID_AUTO, name, mode, var, 0, "");
 
 static int disablecwd;
 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, "");
 
 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls);
 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1);
 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2);
 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3);
 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4);
 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound);
 int
 __getcwd(p, uap)
 	struct proc *p;
 	struct __getcwd_args *uap;
 {
 	char *bp, *buf;
 	int error, i, slash_prefixed;
 	struct filedesc *fdp;
 	struct namecache *ncp;
 	struct vnode *vp;
 
 	numcwdcalls++;
 	if (disablecwd)
 		return (ENODEV);
 	if (uap->buflen < 2)
 		return (EINVAL);
 	if (uap->buflen > MAXPATHLEN)
 		uap->buflen = MAXPATHLEN;
 	buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK);
 	bp += uap->buflen - 1;
 	*bp = '\0';
 	fdp = p->p_fd;
 	slash_prefixed = 0;
 	for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) {
 		if (vp->v_flag & VROOT) {
 			vp = vp->v_mount->mnt_vnodecovered;
 			continue;
 		}
 		if (vp->v_dd->v_id != vp->v_ddid) {
 			numcwdfail1++;
 			free(buf, M_TEMP);
 			return (ENOTDIR);
 		}
 		ncp = TAILQ_FIRST(&vp->v_cache_dst);
 		if (!ncp) {
 			numcwdfail2++;
 			free(buf, M_TEMP);
 			return (ENOENT);
 		}
 		if (ncp->nc_dvp != vp->v_dd) {
 			numcwdfail3++;
 			free(buf, M_TEMP);
 			return (EBADF);
 		}
 		for (i = ncp->nc_nlen - 1; i >= 0; i--) {
 			if (bp == buf) {
 				numcwdfail4++;
 				free(buf, M_TEMP);
 				return (ENOMEM);
 			}
 			*--bp = ncp->nc_name[i];
 		}
 		if (bp == buf) {
 			numcwdfail4++;
 			free(buf, M_TEMP);
 			return (ENOMEM);
 		}
 		*--bp = '/';
 		slash_prefixed = 1;
 		vp = vp->v_dd;
 	}
 	if (!slash_prefixed) {
 		if (bp == buf) {
 			numcwdfail4++;
 			free(buf, M_TEMP);
 			return (ENOMEM);
 		}
 		*--bp = '/';
 	}
 	numcwdfound++;
 	error = copyout(bp, uap->buf, strlen(bp) + 1);
 	free(buf, M_TEMP);
 	return (error);
 }
Index: head/sys/kern/vnode_if.src
===================================================================
--- head/sys/kern/vnode_if.src	(revision 34265)
+++ head/sys/kern/vnode_if.src	(revision 34266)
@@ -1,461 +1,473 @@
 #
 # Copyright (c) 1992, 1993
 #	The Regents of the University of California.  All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 # 1. Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 # 2. Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 # 3. All advertising materials mentioning features or use of this software
 #    must display the following acknowledgement:
 #	This product includes software developed by the University of
 #	California, Berkeley and its contributors.
 # 4. Neither the name of the University nor the names of its contributors
 #    may be used to endorse or promote products derived from this software
 #    without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
 # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 # ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
 # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 # SUCH DAMAGE.
 #
 #	@(#)vnode_if.src	8.12 (Berkeley) 5/14/95
-# $Id: vnode_if.src,v 1.14 1997/10/16 10:48:00 phk Exp $
+# $Id: vnode_if.src,v 1.15 1997/10/16 20:32:23 phk Exp $
 #
 
 #
 # Above each of the vop descriptors is a specification of the locking
 # protocol used by each vop call.  The first column is the name of
 # the variable, the remaining three columns are in, out and error
 # respectively.  The "in" column defines the lock state on input,
 # the "out" column defines the state on succesful return, and the
 # "error" column defines the locking state on error exit.
 #
 # The locking value can take the following values:
 # L: locked.
 # U: unlocked/
 # -: not applicable.  vnode does not yet (or no longer) exists.
 # =: the same on input and output, may be either L or U.
 # X: locked if not nil.
 #
 
 #
 #% lookup	dvp	L ? ?
 #% lookup	vpp	- L -
 #
 # XXX - the lookup locking protocol defies simple description and depends
 #	on the flags and operation fields in the (cnp) structure.  Note
 #	especially that *vpp may equal dvp and both may be locked.
 #
 vop_lookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 #
 #% cachedlookup	dvp	L ? ?
 #% cachedlookup	vpp	- L -
 #
 # This must be an exact copy of lookup.  See kern/vfs_cache.c for details.
 #
 vop_cachedlookup {
 	IN struct vnode *dvp;
 	INOUT struct vnode **vpp;
 	IN struct componentname *cnp;
 };
 
 #
 #% create	dvp	L U U
 #% create	vpp	- L -
 #
 vop_create {
 	IN WILLRELE struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% whiteout	dvp	L L L
 #% whiteout	cnp	- - -
 #% whiteout	flag	- - -
 #
 vop_whiteout {
 	IN WILLRELE struct vnode *dvp;
 	IN struct componentname *cnp;
 	IN int flags;
 };
 
 #
 #% mknod	dvp	L U U
 #% mknod	vpp	- X -
 #
 vop_mknod {
 	IN WILLRELE struct vnode *dvp;
 	OUT WILLRELE struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% open		vp	L L L
 #
 vop_open {
 	IN struct vnode *vp;
 	IN int mode;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% close	vp	U U U
 #
 vop_close {
 	IN struct vnode *vp;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% access	vp	L L L
 #
 vop_access {
 	IN struct vnode *vp;
 	IN int mode;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% getattr	vp	= = =
 #
 vop_getattr {
 	IN struct vnode *vp;
 	IN struct vattr *vap;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% setattr	vp	L L L
 #
 vop_setattr {
 	IN struct vnode *vp;
 	IN struct vattr *vap;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% read		vp	L L L
 #
 vop_read {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 #
 #% write	vp	L L L
 #
 vop_write {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN int ioflag;
 	IN struct ucred *cred;
 };
 
 #
 #% lease	vp	= = =
 #
 vop_lease {
 	IN struct vnode *vp;
 	IN struct proc *p;
 	IN struct ucred *cred;
 	IN int flag;
 };
 
 #
 #% ioctl	vp	U U U
 #
 vop_ioctl {
 	IN struct vnode *vp;
 	IN u_long command;
 	IN caddr_t data;
 	IN int fflag;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% poll	vp	U U U
 #
 vop_poll {
 	IN struct vnode *vp;
 	IN int events;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% revoke	vp	U U U
 #
 vop_revoke {
 	IN struct vnode *vp;
 	IN int flags;
 };
 
 #
 # XXX - not used
 #
 vop_mmap {
 	IN struct vnode *vp;
 	IN int fflags;
 	IN struct ucred *cred;
 	IN struct proc *p;
 };
 
 #
 #% fsync	vp	L L L
 #
 vop_fsync {
 	IN struct vnode *vp;
 	IN struct ucred *cred;
 	IN int waitfor;
 	IN struct proc *p;
 };
 
 #
 #% remove	dvp	L U U
 #% remove	vp	L U U
 #
 vop_remove {
 	IN WILLRELE struct vnode *dvp;
 	IN WILLRELE struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% link		vp	U U U
 #% link		tdvp	L U U
 #
 vop_link {
 	IN WILLRELE struct vnode *tdvp;
 	IN struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% rename	fdvp	U U U
 #% rename	fvp	U U U
 #% rename	tdvp	L U U
 #% rename	tvp	X U U
 #
 vop_rename {
 	IN WILLRELE struct vnode *fdvp;
 	IN WILLRELE struct vnode *fvp;
 	IN struct componentname *fcnp;
 	IN WILLRELE struct vnode *tdvp;
 	IN WILLRELE struct vnode *tvp;
 	IN struct componentname *tcnp;
 };
 
 #
 #% mkdir	dvp	L U U
 #% mkdir	vpp	- L -
 #
 vop_mkdir {
 	IN WILLRELE struct vnode *dvp;
 	OUT struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 };
 
 #
 #% rmdir	dvp	L U U
 #% rmdir	vp	L U U
 #
 vop_rmdir {
 	IN WILLRELE struct vnode *dvp;
 	IN WILLRELE struct vnode *vp;
 	IN struct componentname *cnp;
 };
 
 #
 #% symlink	dvp	L U U
 #% symlink	vpp	- U -
 #
 # XXX - note that the return vnode has already been VRELE'ed
 #	by the filesystem layer.  To use it you must use vget,
 #	possibly with a further namei.
 #
 vop_symlink {
 	IN WILLRELE struct vnode *dvp;
 	OUT WILLRELE struct vnode **vpp;
 	IN struct componentname *cnp;
 	IN struct vattr *vap;
 	IN char *target;
 };
 
 #
 #% readdir	vp	L L L
 #
 vop_readdir {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 	INOUT int *eofflag;
 	OUT int *ncookies;
 	INOUT u_long **cookies;
 };
 
 #
 #% readlink	vp	L L L
 #
 vop_readlink {
 	IN struct vnode *vp;
 	INOUT struct uio *uio;
 	IN struct ucred *cred;
 };
 
 #
 #% abortop	dvp	= = =
 #
 vop_abortop {
 	IN struct vnode *dvp;
 	IN struct componentname *cnp;
 };
 
 #
 #% inactive	vp	L U U
 #
 vop_inactive {
 	IN struct vnode *vp;
 	IN struct proc *p;
 };
 
 #
 #% reclaim	vp	U U U
 #
 vop_reclaim {
 	IN struct vnode *vp;
 	IN struct proc *p;
 };
 
 #
 #% lock		vp	U L U
 #
 vop_lock {
 	IN struct vnode *vp;
 	IN int flags;
 	IN struct proc *p;
 };
 
 #
 #% unlock	vp	L U L
 #
 vop_unlock {
 	IN struct vnode *vp;
 	IN int flags;
 	IN struct proc *p;
 };
 
 #
 #% bmap		vp	L L L
 #% bmap		vpp	- U -
 #
 vop_bmap {
 	IN struct vnode *vp;
 	IN daddr_t bn;
 	OUT struct vnode **vpp;
 	IN daddr_t *bnp;
 	OUT int *runp;
 	OUT int *runb;
 };
 
 #
 # Needs work: no vp?
 #
 #vop_strategy {
 #	IN struct buf *bp;
 #};
 
 #
 #% print	vp	= = =
 #
 vop_print {
 	IN struct vnode *vp;
 };
 
 #
 #% islocked	vp	= = =
 #
 vop_islocked {
 	IN struct vnode *vp;
 };
 
 #
 #% pathconf	vp	L L L
 #
 vop_pathconf {
 	IN struct vnode *vp;
 	IN int name;
 	OUT register_t *retval;
 };
 
 #
 #% advlock	vp	U U U
 #
 vop_advlock {
 	IN struct vnode *vp;
 	IN caddr_t id;
 	IN int op;
 	IN struct flock *fl;
 	IN int flags;
+};
+
+#
+#% balloc	vp	L L L
+#
+vop_balloc {
+	IN struct vnode *vp;
+	IN off_t startoffset;
+	IN int size;
+	IN struct ucred *cred;
+	IN int flags;
+	OUT struct buf **bpp;
 };
 
 #
 #% reallocblks	vp	L L L
 #
 vop_reallocblks {
 	IN struct vnode *vp;
 	IN struct cluster_save *buflist;
 };
 
 vop_getpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int reqpage;
 	IN vm_ooffset_t offset;
 };
 
 vop_putpages {
 	IN struct vnode *vp;
 	IN vm_page_t *m;
 	IN int count;
 	IN int sync;
 	IN int *rtvals;
 	IN vm_ooffset_t offset;
 };
 
 #
 # Needs work: no vp?
 #
 #vop_bwrite {
 #	IN struct buf *bp;
 #};
Index: head/sys/miscfs/specfs/spec_vnops.c
===================================================================
--- head/sys/miscfs/specfs/spec_vnops.c	(revision 34265)
+++ head/sys/miscfs/specfs/spec_vnops.c	(revision 34266)
@@ -1,911 +1,917 @@
 /*
  * Copyright (c) 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)spec_vnops.c	8.14 (Berkeley) 5/21/95
- * $Id: spec_vnops.c,v 1.58 1998/03/07 21:35:52 dyson Exp $
+ * $Id: spec_vnops.c,v 1.59 1998/03/08 08:46:18 dyson Exp $
  */
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 #include <sys/stat.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 #include <vm/vm_extern.h>
 
 #include <miscfs/specfs/specdev.h>
 
 static int	spec_getattr __P((struct  vop_getattr_args *));
 static int	spec_badop __P((void));
 static int	spec_strategy __P((struct vop_strategy_args *));
 static int	spec_print __P((struct vop_print_args *));
 static int	spec_lookup __P((struct vop_lookup_args *));
 static int	spec_open __P((struct vop_open_args *));
 static int	spec_close __P((struct vop_close_args *));
 static int	spec_read __P((struct vop_read_args *));  
 static int	spec_write __P((struct vop_write_args *));
 static int	spec_ioctl __P((struct vop_ioctl_args *));
 static int	spec_poll __P((struct vop_poll_args *));
 static int	spec_inactive __P((struct  vop_inactive_args *));
 static int	spec_fsync __P((struct  vop_fsync_args *));
 static int	spec_bmap __P((struct vop_bmap_args *));
 static int	spec_advlock __P((struct vop_advlock_args *));  
 static int	spec_getpages __P((struct vop_getpages_args *));
 
 struct vnode *speclisth[SPECHSZ];
 vop_t **spec_vnodeop_p;
 static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) vop_ebadf },
 	{ &vop_advlock_desc,		(vop_t *) spec_advlock },
 	{ &vop_bmap_desc,		(vop_t *) spec_bmap },
 	{ &vop_close_desc,		(vop_t *) spec_close },
 	{ &vop_create_desc,		(vop_t *) spec_badop },
 	{ &vop_fsync_desc,		(vop_t *) spec_fsync },
 	{ &vop_getattr_desc,		(vop_t *) spec_getattr },
 	{ &vop_getpages_desc,		(vop_t *) spec_getpages },
 	{ &vop_inactive_desc,		(vop_t *) spec_inactive },
 	{ &vop_ioctl_desc,		(vop_t *) spec_ioctl },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) spec_badop },
 	{ &vop_lookup_desc,		(vop_t *) spec_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) spec_badop },
 	{ &vop_mknod_desc,		(vop_t *) spec_badop },
 	{ &vop_open_desc,		(vop_t *) spec_open },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_poll_desc,		(vop_t *) spec_poll },
 	{ &vop_print_desc,		(vop_t *) spec_print },
 	{ &vop_read_desc,		(vop_t *) spec_read },
 	{ &vop_readdir_desc,		(vop_t *) spec_badop },
 	{ &vop_readlink_desc,		(vop_t *) spec_badop },
 	{ &vop_reallocblks_desc,	(vop_t *) spec_badop },
 	{ &vop_reclaim_desc,		(vop_t *) vop_null },
 	{ &vop_remove_desc,		(vop_t *) spec_badop },
 	{ &vop_rename_desc,		(vop_t *) spec_badop },
 	{ &vop_rmdir_desc,		(vop_t *) spec_badop },
 	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
 	{ &vop_strategy_desc,		(vop_t *) spec_strategy },
 	{ &vop_symlink_desc,		(vop_t *) spec_badop },
 	{ &vop_write_desc,		(vop_t *) spec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_vnodeop_opv_desc =
 	{ &spec_vnodeop_p, spec_vnodeop_entries };
 
 VNODEOP_SET(spec_vnodeop_opv_desc);
 
 
 int
 spec_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 		<other random data follows, presumably>
 	} */ *ap;
 {
 	return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 static void spec_getpages_iodone __P((struct buf *bp));
 
 /*
  * Trivial lookup routine that always fails.
  */
 static int
 spec_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 
 	*ap->a_vpp = NULL;
 	return (ENOTDIR);
 }
 
 /*
  * Open a special file.
  */
 /* ARGSUSED */
 static int
 spec_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct proc *p = ap->a_p;
 	struct vnode *bvp, *vp = ap->a_vp;
 	dev_t bdev, dev = (dev_t)vp->v_rdev;
 	int maj = major(dev);
 	int error;
 
 	/*
 	 * Don't allow open if fs is mounted -nodev.
 	 */
 	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
 		return (ENXIO);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		if ((u_int)maj >= nchrdev)
 			return (ENXIO);
 		if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL))
 			return ENXIO;
 		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
 			/*
 			 * When running in very secure mode, do not allow
 			 * opens for writing of any disk character devices.
 			 */
 			if (securelevel >= 2
 			    && cdevsw[maj]->d_bdev
 			    && (cdevsw[maj]->d_bdev->d_flags & D_TYPEMASK) == 
 			    D_DISK)
 				return (EPERM);
 			/*
 			 * When running in secure mode, do not allow opens
 			 * for writing of /dev/mem, /dev/kmem, or character
 			 * devices whose corresponding block devices are
 			 * currently mounted.
 			 */
 			if (securelevel >= 1) {
 				if ((bdev = chrtoblk(dev)) != NODEV &&
 				    vfinddev(bdev, VBLK, &bvp) &&
 				    bvp->v_usecount > 0 &&
 				    (error = vfs_mountedon(bvp)))
 					return (error);
 				if (iskmemdev(dev))
 					return (EPERM);
 			}
 		}
 #if 0
 		/*
 		 * Lite2 stuff.  We will almost certainly do this
 		 * differently with devfs.  The only use of this flag
 		 * is in dead_read to make ttys return EOF instead of
 		 * EIO when they are dead.  Pre-lite2 FreeBSD returns
 		 * EOF for all character devices.
 		 */
 		if (cdevsw[maj]->d_type == D_TTY)
 			vp->v_flag |= VISTTY;
 #endif
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if ((u_int)maj >= nblkdev)
 			return (ENXIO);
 		if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL))
 			return ENXIO;
 		/*
 		 * When running in very secure mode, do not allow
 		 * opens for writing of any disk block devices.
 		 */
 		if (securelevel >= 2 && ap->a_cred != FSCRED &&
 		    (ap->a_mode & FWRITE) &&
 		    (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
 			return (EPERM);
 
 		/*
 		 * Do not allow opens of block devices that are
 		 * currently mounted.
 		 */
 		error = vfs_mountedon(vp);
 		if (error)
 			return (error);
 		return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p));
 	}
 	return (0);
 }
 
 /*
  * Vnode op for read
  */
 /* ARGSUSED */
 static int
 spec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
  	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn, nextbn;
 	long bsize, bscale;
 	struct partinfo dpart;
 	int n, on, majordev;
 	d_ioctl_t *ioctl;
 	int error = 0;
 	dev_t dev;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("spec_read mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_read proc");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[major(vp->v_rdev)]->d_read)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		dev = vp->v_rdev;
 		if ((majordev = major(dev)) < nblkdev &&
 		    (ioctl = bdevsw[majordev]->d_ioctl) != NULL &&
 		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
 		    dpart.part->p_fstype == FS_BSDFFS &&
 		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 			bsize = dpart.part->p_frag * dpart.part->p_fsize;
 		bscale = btodb(bsize);
 		do {
 			bn = btodb(uio->uio_offset) & ~(bscale - 1);
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (vp->v_lastr + bscale == bn) {
 				nextbn = bn + bscale;
 				error = breadn(vp, bn, (int)bsize, &nextbn,
 					(int *)&bsize, 1, NOCRED, &bp);
 			} else
 				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
 			vp->v_lastr = bn;
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			brelse(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_read type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Vnode op for write
  */
 /* ARGSUSED */
 static int
 spec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	struct buf *bp;
 	daddr_t bn;
 	int bsize, blkmask;
 	struct partinfo dpart;
 	register int n, on;
 	int error = 0;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("spec_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("spec_write proc");
 #endif
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		VOP_UNLOCK(vp, 0, p);
 		error = (*cdevsw[major(vp->v_rdev)]->d_write)
 			(vp->v_rdev, uio, ap->a_ioflag);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 		return (error);
 
 	case VBLK:
 		if (uio->uio_resid == 0)
 			return (0);
 		if (uio->uio_offset < 0)
 			return (EINVAL);
 		bsize = BLKDEV_IOSIZE;
 		if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
 		    (caddr_t)&dpart, FREAD, p) == 0) {
 			if (dpart.part->p_fstype == FS_BSDFFS &&
 			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
 				bsize = dpart.part->p_frag *
 				    dpart.part->p_fsize;
 		}
 		blkmask = btodb(bsize) - 1;
 		do {
 			bn = btodb(uio->uio_offset) & ~blkmask;
 			on = uio->uio_offset % bsize;
 			n = min((unsigned)(bsize - on), uio->uio_resid);
 			if (n == bsize)
 				bp = getblk(vp, bn, bsize, 0, 0);
 			else
 				error = bread(vp, bn, bsize, NOCRED, &bp);
 			n = min(n, bsize - bp->b_resid);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			error = uiomove((char *)bp->b_data + on, n, uio);
 			if (n + on == bsize)
 				bawrite(bp);
 			else
 				bdwrite(bp);
 		} while (error == 0 && uio->uio_resid > 0 && n != 0);
 		return (error);
 
 	default:
 		panic("spec_write type");
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Device ioctl operation.
  */
 /* ARGSUSED */
 static int
 spec_ioctl(ap)
 	struct vop_ioctl_args /* {
 		struct vnode *a_vp;
 		int  a_command;
 		caddr_t  a_data;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	dev_t dev = ap->a_vp->v_rdev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
 		    ap->a_fflag, ap->a_p));
 
 	case VBLK:
 		if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
 			if ((bdevsw[major(dev)]->d_flags & D_TYPEMASK) ==
 			    D_TAPE)
 				return (0);
 			else
 				return (1);
 		return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data,
 		   ap->a_fflag, ap->a_p));
 
 	default:
 		panic("spec_ioctl");
 		/* NOTREACHED */
 	}
 }
 
 /* ARGSUSED */
 static int
 spec_poll(ap)
 	struct vop_poll_args /* {
 		struct vnode *a_vp;
 		int  a_events;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register dev_t dev;
 
 	switch (ap->a_vp->v_type) {
 
 	case VCHR:
 		dev = ap->a_vp->v_rdev;
 		return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p);
 	default:
 		return (vop_defaultop((struct vop_generic_args *)ap));
 
 	}
 }
 /*
  * Synch buffers associated with a block device
  */
 /* ARGSUSED */
 static int
 spec_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int  a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct buf *bp;
 	struct buf *nbp;
 	int s;
 
 	if (vp->v_type == VCHR)
 		return (0);
 	/*
 	 * Flush all dirty buffers associated with a block device.
 	 */
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
 		if ((bp->b_flags & B_BUSY))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("spec_fsync: not dirty");
 		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
 			vfs_bio_awrite(bp);
 			splx(s);
 		} else {
 			bremfree(bp);
 			bp->b_flags |= B_BUSY;
 			splx(s);
 			bawrite(bp);
 		}
 		goto loop;
 	}
 	if (ap->a_waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
 		}
 #ifdef DIAGNOSTIC
 		if (vp->v_dirtyblkhd.lh_first) {
 			vprint("spec_fsync: dirty", vp);
 			splx(s);
 			goto loop;
 		}
 #endif
 	}
 	splx(s);
 	return (0);
 }
 
 static int
 spec_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
 	return (0);
 }
 
 /*
  * Just call the device strategy routine
  */
 static int
 spec_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
+	struct buf *bp;
 
-	(*bdevsw[major(ap->a_bp->b_dev)]->d_strategy)(ap->a_bp);
+	bp = ap->a_bp;
+	if ((LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
+		(*bioops.io_start)(bp);
+	(*bdevsw[major(bp->b_dev)]->d_strategy)(bp);
 	return (0);
 }
 
 /*
  * This is a noop, simply returning what one has been given.
  */
 static int
 spec_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = ap->a_vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn;
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Device close routine
  */
 /* ARGSUSED */
 static int
 spec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct proc *p = ap->a_p;
 	dev_t dev = vp->v_rdev;
 	d_close_t *devclose;
 	int mode, error;
 
 	switch (vp->v_type) {
 
 	case VCHR:
 		/*
 		 * Hack: a tty device that is a controlling terminal
 		 * has a reference from the session structure.
 		 * We cannot easily tell that a character device is
 		 * a controlling terminal, unless it is the closing
 		 * process' controlling terminal.  In that case,
 		 * if the reference count is 2 (this last descriptor
 		 * plus the session), release the reference from the session.
 		 */
 		if (vcount(vp) == 2 && ap->a_p &&
 		    (vp->v_flag & VXLOCK) == 0 &&
 		    vp == ap->a_p->p_session->s_ttyvp) {
 			vrele(vp);
 			ap->a_p->p_session->s_ttyvp = NULL;
 		}
 		/*
 		 * If the vnode is locked, then we are in the midst
 		 * of forcably closing the device, otherwise we only
 		 * close on last reference.
 		 */
 		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 		devclose = cdevsw[major(dev)]->d_close;
 		mode = S_IFCHR;
 		break;
 
 	case VBLK:
 		/*
 		 * On last close of a block device (that isn't mounted)
 		 * we must invalidate any in core blocks, so that
 		 * we can, for instance, change floppy disks.
 		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
+		VOP_UNLOCK(vp, 0, ap->a_p);
 		if (error)
 			return (error);
 
 		/*
 		 * We do not want to really close the device if it
 		 * is still in use unless we are trying to close it
 		 * forcibly. Since every use (buffer, vnode, swap, cmap)
 		 * holds a reference to the vnode, and because we mark
 		 * any other vnodes that alias this device, when the
 		 * sum of the reference counts on all the aliased
 		 * vnodes descends to one, we are on last close.
 		 */
 		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
 			return (0);
 
 		devclose = bdevsw[major(dev)]->d_close;
 		mode = S_IFBLK;
 		break;
 
 	default:
 		panic("spec_close: not special");
 	}
 
 	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
 }
 
 /*
  * Print out the contents of a special device vnode.
  */
 static int
 spec_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
 		minor(ap->a_vp->v_rdev));
 	return (0);
 }
 
 /*
  * Special device advisory byte-level locks.
  */
 /* ARGSUSED */
 static int
 spec_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 
 	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
 }
 
 /*
  * Special device bad operation
  */
 static int
 spec_badop()
 {
 
 	panic("spec_badop called");
 	/* NOTREACHED */
 }
 
 static void
 spec_getpages_iodone(bp)
 	struct buf *bp;
 {
 
 	bp->b_flags |= B_DONE;
 	wakeup(bp);
 }
 
 static int
 spec_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	vm_offset_t kva;
 	int error;
 	int i, pcount, size, s;
 	daddr_t blkno;
 	struct buf *bp;
 	vm_page_t m;
 	vm_ooffset_t offset;
 	int toff, nextoff, nread;
 	struct vnode *vp = ap->a_vp;
 	int blksiz;
 	int gotreqpage;
 
 	error = 0;
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 
 	/*
 	 * Calculate the offset of the transfer.
 	 */
 	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
 
 	/* XXX sanity check before we go into details. */
 	/* XXX limits should be defined elsewhere. */
 #define	DADDR_T_BIT	32
 #define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
 	if (offset < 0 || offset > OFFSET_MAX) {
 		/* XXX still no %q in kernel. */
 		printf("spec_getpages: preposterous offset 0x%x%08x\n",
 		       (u_int)((u_quad_t)offset >> 32),
 		       (u_int)(offset & 0xffffffff));
 		return (VM_PAGER_ERROR);
 	}
 
 	blkno = btodb(offset);
 
 	/*
 	 * Round up physical size for real devices, use the
 	 * fundamental blocksize of the fs if possible.
 	 */
 	if (vp && vp->v_mount)
 		blksiz = vp->v_mount->mnt_stat.f_bsize;
 	else
 		blksiz = DEV_BSIZE;
 	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
 
 	bp = getpbuf();
 	kva = (vm_offset_t)bp->b_data;
 
 	/*
 	 * Map the pages to be read into the kva.
 	 */
 	pmap_qenter(kva, ap->a_m, pcount);
 
 	/* Build a minimal buffer header. */
 	bp->b_flags = B_BUSY | B_READ | B_CALL;
 	bp->b_iodone = spec_getpages_iodone;
 
 	/* B_PHYS is not set, but it is nice to fill this in. */
 	bp->b_proc = curproc;
 	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 	if (bp->b_rcred != NOCRED)
 		crhold(bp->b_rcred);
 	if (bp->b_wcred != NOCRED)
 		crhold(bp->b_wcred);
 	bp->b_blkno = blkno;
 	bp->b_lblkno = blkno;
 	pbgetvp(ap->a_vp, bp);
 	bp->b_bcount = size;
 	bp->b_bufsize = size;
 	bp->b_resid = 0;
 
 	cnt.v_vnodein++;
 	cnt.v_vnodepgsin += pcount;
 
 	/* Do the input. */
 	VOP_STRATEGY(bp);
 
 	s = splbio();
 
 	/* We definitely need to be at splbio here. */
 	while ((bp->b_flags & B_DONE) == 0)
 		tsleep(bp, PVM, "spread", 0);
 
 	splx(s);
 
 	if ((bp->b_flags & B_ERROR) != 0) {
 		if (bp->b_error)
 			error = bp->b_error;
 		else
 			error = EIO;
 	}
 
 	nread = size - bp->b_resid;
 
 	if (nread < ap->a_count) {
 		bzero((caddr_t)kva + nread,
 			ap->a_count - nread);
 	}
 	pmap_qremove(kva, pcount);
 
 
 	gotreqpage = 0;
 	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= nread) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else if (toff < nread) {
 			int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
 			vm_page_set_validclean(m, 0, nvalid);
 		} else {
 			m->valid = 0;
 			m->dirty = 0;
 		}
 
 		if (i != ap->a_reqpage) {
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
 				if (m->valid) {
 					if (m->flags & PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 					PAGE_WAKEUP(m);
 				} else {
 					vm_page_free(m);
 				}
 			} else {
 				vm_page_free(m);
 			}
 		} else if (m->valid) {
 			gotreqpage = 1;
 		}
 	}
 	if (!gotreqpage) {
 		m = ap->a_m[ap->a_reqpage];
 #ifndef MAX_PERF
 		printf("spec_getpages: I/O read failure: (error code=%d)\n", error);
 		printf("               size: %d, resid: %d, a_count: %d, valid: 0x%x\n",
 				size, bp->b_resid, ap->a_count, m->valid);
 		printf("               nread: %d, reqpage: %d, pindex: %d, pcount: %d\n",
 				nread, ap->a_reqpage, m->pindex, pcount);
 #endif
 		/*
 		 * Free the buffer header back to the swap buffer pool.
 		 */
 		relpbuf(bp);
 		return VM_PAGER_ERROR;
 	}
 	/*
 	 * Free the buffer header back to the swap buffer pool.
 	 */
 	relpbuf(bp);
 	return VM_PAGER_OK;
 }
 
 /* ARGSUSED */
 static int
 spec_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vattr *vap = ap->a_vap;
 	struct partinfo dpart;
 
 	bzero(vap, sizeof (*vap));
 
 	if (vp->v_type == VBLK)
 		vap->va_blocksize = BLKDEV_IOSIZE;
 	else if (vp->v_type == VCHR)
 		vap->va_blocksize = MAXBSIZE;
 
 	if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
 	    (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
 		vap->va_bytes = dbtob(dpart.disklab->d_partitions
 				      [minor(vp->v_rdev)].p_size);
 		vap->va_size = vap->va_bytes;
 	}
 	return (0);
 }
Index: head/sys/miscfs/specfs/specdev.h
===================================================================
--- head/sys/miscfs/specfs/specdev.h	(revision 34265)
+++ head/sys/miscfs/specfs/specdev.h	(revision 34266)
@@ -1,84 +1,79 @@
 /*
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)specdev.h	8.6 (Berkeley) 5/21/95
- * $Id: specdev.h,v 1.12 1997/09/14 02:58:03 peter Exp $
+ * $Id: specdev.h,v 1.13 1997/10/15 13:23:21 phk Exp $
  */
 
 /*
  * This structure defines the information maintained about
  * special devices. It is allocated in checkalias and freed
  * in vgone.
  */
 struct specinfo {
 	struct	vnode **si_hashchain;
 	struct	vnode *si_specnext;
-	long	si_flags;
+	struct	mount *si_mountpoint;
 	dev_t	si_rdev;
 };
 /*
  * Exported shorthand
  */
 #define v_rdev v_specinfo->si_rdev
 #define v_hashchain v_specinfo->si_hashchain
 #define v_specnext v_specinfo->si_specnext
-#define v_specflags v_specinfo->si_flags
-
-/*
- * Flags for specinfo
- */
-#define	SI_MOUNTEDON	0x0001	/* block special device is mounted on */
+#define v_specmountpoint v_specinfo->si_mountpoint
 
 /*
  * Special device management
  */
 #define	SPECHSZ	64
 #if	((SPECHSZ&(SPECHSZ-1)) == 0)
 #define	SPECHASH(rdev)	(((rdev>>5)+(rdev))&(SPECHSZ-1))
 #else
 #define	SPECHASH(rdev)	(((unsigned)((rdev>>5)+(rdev)))%SPECHSZ)
 #endif
 
 extern	struct vnode *speclisth[SPECHSZ];
 
 /*
  * Prototypes for special file operations on vnodes.
  */
 extern	vop_t **spec_vnodeop_p;
 struct	nameidata;
 struct	componentname;
 struct	ucred;
 struct	flock;
 struct	buf;
 struct	uio;
 
 int	spec_vnoperate __P((struct vop_generic_args *));
Index: head/sys/msdosfs/msdosfs_vfsops.c
===================================================================
--- head/sys/msdosfs/msdosfs_vfsops.c	(revision 34265)
+++ head/sys/msdosfs/msdosfs_vfsops.c	(revision 34266)
@@ -1,1051 +1,1054 @@
-/*	$Id: msdosfs_vfsops.c,v 1.28 1998/02/23 16:44:32 ache Exp $ */
+/*	$Id: msdosfs_vfsops.c,v 1.29 1998/03/01 22:46:27 msmith Exp $ */
 /*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
  * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
  * All rights reserved.
  * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by TooLs GmbH.
  * 4. The name of TooLs GmbH may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
  * Written by Paul Popelka (paulp@uts.amdahl.com)
  *
  * You can do anything you want with this software, just don't say you wrote
  * it, and don't remove this notice.
  *
  * This software is provided "as is".
  *
  * The author supplies this software to be publicly redistributed on the
  * understanding that the author is not responsible for the correct
  * functioning of this software in any circumstances and is not liable for
  * any damages caused by this software.
  *
  * October 1992
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/stat.h> 				/* defines ALLPERMS */
 
 #include <msdosfs/bpb.h>
 #include <msdosfs/bootsect.h>
 #include <msdosfs/direntry.h>
 #include <msdosfs/denode.h>
 #include <msdosfs/msdosfsmount.h>
 #include <msdosfs/fat.h>
 
 MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
 static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
 
 static int	update_mp __P((struct mount *mp, struct msdosfs_args *argp));
 static int	mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
 				  struct proc *p, struct msdosfs_args *argp));
 static int	msdosfs_fhtovp __P((struct mount *, struct fid *,
 				    struct sockaddr *, struct vnode **, int *,
 				    struct ucred **));
 static int	msdosfs_mount __P((struct mount *, char *, caddr_t,
 				   struct nameidata *, struct proc *));
 static int	msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
 				      struct proc *));
 static int	msdosfs_root __P((struct mount *, struct vnode **));
 static int	msdosfs_start __P((struct mount *, int, struct proc *));
 static int	msdosfs_statfs __P((struct mount *, struct statfs *,
 				    struct proc *));
 static int	msdosfs_sync __P((struct mount *, int, struct ucred *,
 				  struct proc *));
 static int	msdosfs_unmount __P((struct mount *, int, struct proc *));
 static int	msdosfs_vget __P((struct mount *mp, ino_t ino,
 				  struct vnode **vpp));
 static int	msdosfs_vptofh __P((struct vnode *, struct fid *));
 
 static int
 update_mp(mp, argp)
 	struct mount *mp;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error;
 
 	pmp->pm_gid = argp->gid;
 	pmp->pm_uid = argp->uid;
 	pmp->pm_mask = argp->mask & ALLPERMS;
 	pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
 	if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
 		bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
 		bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
 		bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
 		bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
 		bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
 	}
 
 #ifndef __FreeBSD__
 	/*
 	 * GEMDOS knows nothing (yet) about win95
 	 */
 	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
 		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
 #endif
 
 	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 	else if (!(pmp->pm_flags &
 	    (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
 		struct vnode *rootvp;
 
 		/*
 		 * Try to divine whether to support Win'95 long filenames
 		 */
 		if (FAT32(pmp))
 			pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
 		else {
 			if ((error = msdosfs_root(mp, &rootvp)) != 0)
 				return error;
 			pmp->pm_flags |= findwin95(VTODE(rootvp))
 				? MSDOSFSMNT_LONGNAME
 					: MSDOSFSMNT_SHORTNAME;
 			vput(rootvp);
 		}
 	}
 	return 0;
 }
 
 #ifndef __FreeBSD__
 int
 msdosfs_mountroot()
 {
 	register struct mount *mp;
 	struct proc *p = curproc;	/* XXX */
 	size_t size;
 	int error;
 	struct msdosfs_args args;
 
 	if (root_device->dv_class != DV_DISK)
 		return (ENODEV);
 
 	/*
 	 * Get vnodes for swapdev and rootdev.
 	 */
 	if (bdevvp(rootdev, &rootvp))
 		panic("msdosfs_mountroot: can't setup rootvp");
 
 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
 	bzero((char *)mp, (u_long)sizeof(struct mount));
 	mp->mnt_op = &msdosfs_vfsops;
 	mp->mnt_flag = 0;
 	LIST_INIT(&mp->mnt_vnodelist);
 
 	args.flags = 0;
 	args.uid = 0;
 	args.gid = 0;
 	args.mask = 0777;
 
 	if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = update_mp(mp, &args)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	if ((error = vfs_lock(mp)) != 0) {
 		(void)msdosfs_unmount(mp, 0, p);
 		free(mp, M_MOUNT);
 		return (error);
 	}
 
 	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
 	mp->mnt_vnodecovered = NULLVP;
 	(void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void)msdosfs_statfs(mp, &mp->mnt_stat, p);
 	vfs_unlock(mp);
 	return (0);
 }
 #endif
 
 /*
  * mp - path - addr in user space of mount point (ie /usr or whatever)
  * data - addr in user space of mount params including the name of the block
  * special file to treat as a filesystem.
  */
 static int
 msdosfs_mount(mp, path, data, ndp, p)
 	struct mount *mp;
 	char *path;
 	caddr_t data;
 	struct nameidata *ndp;
 	struct proc *p;
 {
 	struct vnode *devvp;	  /* vnode for blk device to mount */
 	struct msdosfs_args args; /* will hold data from mount request */
 	/* msdosfs specific mount control block */
 	struct msdosfsmount *pmp = NULL;
 	size_t size;
 	int error, flags;
 	mode_t accessmode;
 
 	error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
 	if (error)
 		return (error);
 	if (args.magic != MSDOSFS_ARGSMAGIC) {
 		printf("Old mount_msdosfs, flags=%d\n", args.flags);
 		args.flags = 0;
 	}
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		pmp = VFSTOMSDOSFS(mp);
 		error = 0;
 		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
 			error = vflush(mp, NULLVP, flags);
 		}
 		if (!error && (mp->mnt_flag & MNT_RELOAD))
 			/* not yet implemented */
 			error = EOPNOTSUPP;
 		if (error)
 			return (error);
 		if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = pmp->pm_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE, p);
 				error = VOP_ACCESS(devvp, VREAD | VWRITE,
 						   p->p_ucred, p);
 				if (error) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
 		}
 		if (args.fspec == 0) {
 #ifdef	__notyet__		/* doesn't work correctly with current mountd	XXX */
 			if (args.flags & MSDOSFSMNT_MNTOPT) {
 				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
 				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
 				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
 					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
 			}
 #endif
 			/*
 			 * Process export requests.
 			 */
 			return (vfs_export(mp, &pmp->pm_export, &args.export));
 		}
 	}
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	error = namei(ndp);
 	if (error)
 		return (error);
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		vrele(devvp);
 		return (ENOTBLK);
 	}
 	if (major(devvp->v_rdev) >= nblkdev) {
 		vrele(devvp);
 		return (ENXIO);
 	}
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
 		if (error) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
 		error = mountmsdosfs(devvp, mp, p, &args);
 #ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
 		pmp = VFSTOMSDOSFS(mp);
 #endif
 	} else {
 		if (devvp != pmp->pm_devvp)
 			error = EINVAL;	/* XXX needs translation */
 		else
 			vrele(devvp);
 	}
 	if (error) {
 		vrele(devvp);
 		return (error);
 	}
 
 	error = update_mp(mp, &args);
 	if (error) {
 		msdosfs_unmount(mp, MNT_FORCE, p);
 		return error;
 	}
 
 	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
 	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
 	    &size);
 	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 	(void) msdosfs_statfs(mp, &mp->mnt_stat, p);
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
 #endif
 	return (0);
 }
 
 static int
 mountmsdosfs(devvp, mp, p, argp)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct msdosfs_args *argp;
 {
 	struct msdosfsmount *pmp;
 	struct buf *bp;
 	dev_t dev = devvp->v_rdev;
 #ifndef __FreeBSD__
 	struct partinfo dpart;
 #endif
 	union bootsector *bsp;
 	struct byte_bpb33 *b33;
 	struct byte_bpb50 *b50;
 #ifdef	PC98
 	u_int	pc98_wrk;
 	u_int	Phy_Sector_Size;
 #endif
 	struct byte_bpb710 *b710;
 	u_int8_t SecPerClust;
 	int	ronly, error;
 	int	bsize = 0, dtype = 0, tmp;
 
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	if (vcount(devvp) > 1 && devvp != rootvp)
 		return (EBUSY);
 	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
 	VOP_UNLOCK(devvp, 0, p);
 	if (error)
 		return (error);
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	bp  = NULL; /* both used in error_exit */
 	pmp = NULL;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		/*
 	 	 * We need the disklabel to calculate the size of a FAT entry
 		 * later on. Also make sure the partition contains a filesystem
 		 * of type FS_MSDOS. This doesn't work for floppies, so we have
 		 * to check for them too.
 	 	 *
 	 	 * At least some parts of the msdos fs driver seem to assume
 		 * that the size of a disk block will always be 512 bytes.
 		 * Let's check it...
 		 */
 		error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
 				  FREAD, NOCRED, p);
 		if (error)
 			goto error_exit;
 		tmp   = dpart.part->p_fstype;
 		dtype = dpart.disklab->d_type;
 		bsize = dpart.disklab->d_secsize;
 		if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 	}
 #endif
 
 	/*
 	 * Read the boot sector of the filesystem, and then check the
 	 * boot signature.  If not a dos boot sector then error out.
 	 */
 #ifdef	PC98
 	devvp->v_flag &= 0xffff; 
 	error = bread(devvp, 0, 1024, NOCRED, &bp);
 #else
 	error = bread(devvp, 0, 512, NOCRED, &bp);
 #endif
 	if (error)
 		goto error_exit;
 	bp->b_flags |= B_AGE;
 	bsp = (union bootsector *)bp->b_data;
 	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
 	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
 	b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 #ifdef PC98
 		if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
 		    && (bsp->bs50.bsBootSectSig0 != 0       /* PC98 DOS 3.3x */
 		    || bsp->bs50.bsBootSectSig1 != 0)
 		    && (bsp->bs50.bsBootSectSig0 != 0x90    /* PC98 DOS 5.0  */
 		    || bsp->bs50.bsBootSectSig1 != 0x3d)
 		    && (bsp->bs50.bsBootSectSig0 != 0x46    /* PC98 DOS 3.3B */
 		    || bsp->bs50.bsBootSectSig1 != 0xfa)) {
 #else
 		if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
 		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
 	bzero((caddr_t)pmp, sizeof *pmp);
 	pmp->pm_mountp = mp;
 
 	/*
 	 * Compute several useful quantities from the bpb in the
 	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
 	 * the fields that are different between dos 5 and dos 3.3.
 	 */
 	SecPerClust = b50->bpbSecPerClust;
 	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
 	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
 	pmp->pm_FATs = b50->bpbFATs;
 	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
 	pmp->pm_Sectors = getushort(b50->bpbSectors);
 	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
 	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
 	pmp->pm_Heads = getushort(b50->bpbHeads);
 	pmp->pm_Media = b50->bpbMedia;
 
 #ifndef __FreeBSD__
 	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
 #endif
 		/* XXX - We should probably check more values here */
 		if (!pmp->pm_BytesPerSec || !SecPerClust
 			|| !pmp->pm_Heads || pmp->pm_Heads > 255
 #ifdef PC98
 	    		|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
 #else
 			|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
 #endif
 			error = EINVAL;
 			goto error_exit;
 		}
 #ifndef __FreeBSD__
 	}
 #endif
 
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
 	} else {
 		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
 		pmp->pm_HugeSectors = pmp->pm_Sectors;
 	}
 #ifdef	PC98	/* for PC98		added Satoshi Yasuda	*/
 	Phy_Sector_Size = 512;
 	if ((devvp->v_rdev>>8) == 2) {	/* floppy check */
 		if (((devvp->v_rdev&077) == 2) && (pmp->pm_HugeSectors == 1232)) {
 				Phy_Sector_Size = 1024;	/* 2HD */
 				/*
 				 * 1024byte/sector support
 				 */
 				devvp->v_flag |= 0x10000;
 		} else {
 			if ((((devvp->v_rdev&077) == 3)	/* 2DD 8 or 9 sector */
 				&& (pmp->pm_HugeSectors == 1440)) /* 9 sector */
 				|| (((devvp->v_rdev&077) == 4)
 				&& (pmp->pm_HugeSectors == 1280)) /* 8 sector */
 				|| (((devvp->v_rdev&077) == 5)
 				&& (pmp->pm_HugeSectors == 2880))) { /* 1.44M */
 					Phy_Sector_Size = 512;
 			} else {
 				if (((devvp->v_rdev&077) != 1)
 				    && ((devvp->v_rdev&077) != 0)) { /* 2HC */
 					error = EINVAL;
 					goto error_exit;
 				}
 			}
 		}
 	}			
 	pc98_wrk = pmp->pm_BytesPerSec / Phy_Sector_Size;
 	pmp->pm_BytesPerSec = Phy_Sector_Size;
 	SecPerClust = SecPerClust * pc98_wrk;
 	pmp->pm_HugeSectors = pmp->pm_HugeSectors * pc98_wrk;
 	pmp->pm_ResSectors = pmp->pm_ResSectors * pc98_wrk;
 	pmp->pm_FATsecs = pmp->pm_FATsecs * pc98_wrk;
 	pmp->pm_SecPerTrack = pmp->pm_SecPerTrack * pc98_wrk;
 	pmp->pm_HiddenSects = pmp->pm_HiddenSects * pc98_wrk;
 #endif			/*						*/ 
 	if (pmp->pm_HugeSectors > 0xffffffff / pmp->pm_BytesPerSec + 1) {
 		/*
 		 * We cannot deal currently with this size of disk
 		 * due to fileid limitations (see msdosfs_getattr and
 		 * msdosfs_readdir)
 		 */
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	if (pmp->pm_RootDirEnts == 0) {
 		if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
 		    || bsp->bs710.bsBootSectSig3 != BOOTSIG3
 		    || pmp->pm_Sectors
 		    || pmp->pm_FATsecs
 		    || getushort(b710->bpbFSVers)) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		pmp->pm_fatmask = FAT32_MASK;
 		pmp->pm_fatmult = 4;
 		pmp->pm_fatdiv = 1;
 		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
 		if (getushort(b710->bpbExtFlags) & FATMIRROR)
 			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
 		else
 			pmp->pm_flags |= MSDOSFS_FATMIRROR;
 	} else
 		pmp->pm_flags |= MSDOSFS_FATMIRROR;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if (FAT32(pmp)) {
 			/*
 			 * GEMDOS doesn't know fat32.
 			 */
 			error = EINVAL;
 			goto error_exit;
 		}
 
 		/*
 		 * Check a few values (could do some more):
 		 * - logical sector size: power of 2, >= block size
 		 * - sectors per cluster: power of 2, >= 1
 		 * - number of sectors:   >= 1, <= size of partition
 		 */
 		if ( (SecPerClust == 0)
 		  || (SecPerClust & (SecPerClust - 1))
 		  || (pmp->pm_BytesPerSec < bsize)
 		  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
 		  || (pmp->pm_HugeSectors == 0)
 		  || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
 							> dpart.part->p_size)
 		   ) {
 			error = EINVAL;
 			goto error_exit;
 		}
 		/*
 		 * XXX - Many parts of the msdos fs driver seem to assume that
 		 * the number of bytes per logical sector (BytesPerSec) will
 		 * always be the same as the number of bytes per disk block
 		 * Let's pretend it is.
 		 */
 		tmp = pmp->pm_BytesPerSec / bsize;
 		pmp->pm_BytesPerSec  = bsize;
 		pmp->pm_HugeSectors *= tmp;
 		pmp->pm_HiddenSects *= tmp;
 		pmp->pm_ResSectors  *= tmp;
 		pmp->pm_Sectors     *= tmp;
 		pmp->pm_FATsecs     *= tmp;
 		SecPerClust         *= tmp;
 	}
 #endif
 	pmp->pm_fatblk = pmp->pm_ResSectors;
 	if (FAT32(pmp)) {
 		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
 		pmp->pm_firstcluster = pmp->pm_fatblk
 			+ (pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
 	} else {
 		pmp->pm_rootdirblk = pmp->pm_fatblk +
 			(pmp->pm_FATs * pmp->pm_FATsecs);
 		pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
 				       + pmp->pm_BytesPerSec - 1)
 			/ pmp->pm_BytesPerSec;/* in sectors */
 		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
 	}
 
 	pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
 	    SecPerClust;
 	pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
 	pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
 
 #ifndef __FreeBSD__
 	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
 		if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
 		      && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
 		      && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
 		    ) {
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	} else 
 #endif
 	if (pmp->pm_fatmask == 0) {
 		if (pmp->pm_maxcluster
 		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
 			/*
 			 * This will usually be a floppy disk. This size makes
 			 * sure that one fat entry will not be split across
 			 * multiple blocks.
 			 */
 			pmp->pm_fatmask = FAT12_MASK;
 			pmp->pm_fatmult = 3;
 			pmp->pm_fatdiv = 2;
 		} else {
 			pmp->pm_fatmask = FAT16_MASK;
 			pmp->pm_fatmult = 2;
 			pmp->pm_fatdiv = 1;
 		}
 	}
 	if (FAT12(pmp))
 		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
 	else
 		pmp->pm_fatblocksize = MAXBSIZE;
 
 	pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
 	pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
 
 	/*
 	 * Compute mask and shift value for isolating cluster relative byte
 	 * offsets and cluster numbers from a file offset.
 	 */
 	pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
 	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
 	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
 
 	/*
 	 * Check for valid cluster size
 	 * must be a power of 2
 	 */
 	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
 		error = EINVAL;
 		goto error_exit;
 	}
 
 	/*
 	 * Release the bootsector buffer.
 	 */
 	brelse(bp);
 	bp = NULL;
 
 	/*
 	 * Check FSInfo.
 	 */
 	if (pmp->pm_fsinfo) {
 		struct fsinfo *fp;
 
 		if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
 			goto error_exit;
 		fp = (struct fsinfo *)bp->b_data;
 		if (!bcmp(fp->fsisig1, "RRaA", 4)
 		    && !bcmp(fp->fsisig2, "rrAa", 4)
 		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
 		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
 			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
 		else
 			pmp->pm_fsinfo = 0;
 		brelse(bp);
 		bp = NULL;
 	}
 
 	/*
 	 * Check and validate (or perhaps invalidate?) the fsinfo structure?		XXX
 	 */
 
 	/*
 	 * Allocate memory for the bitmap of allocated clusters, and then
 	 * fill it in.
 	 */
 	pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
 				   / N_INUSEBITS)
 				  * sizeof(*pmp->pm_inusemap),
 				  M_MSDOSFSFAT, M_WAITOK);
 
 	/*
 	 * fillinusemap() needs pm_devvp.
 	 */
 	pmp->pm_dev = dev;
 	pmp->pm_devvp = devvp;
 
 	/*
 	 * Have the inuse map filled in.
 	 */
 	if ((error = fillinusemap(pmp)) != 0)
 		goto error_exit;
 
 	/*
 	 * If they want fat updates to be synchronous then let them suffer
 	 * the performance degradation in exchange for the on disk copy of
 	 * the fat being correct just about all the time.  I suppose this
 	 * would be a good thing to turn on if the kernel is still flakey.
 	 */
 	if (mp->mnt_flag & MNT_SYNCHRONOUS)
 		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
 
 	/*
 	 * Finish up.
 	 */
 	if (ronly)
 		pmp->pm_flags |= MSDOSFSMNT_RONLY;
 	else
 		pmp->pm_fmod = 1;
 	mp->mnt_data = (qaddr_t) pmp;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_flag |= MNT_LOCAL;
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 
 	return 0;
 
 error_exit:
 	if (bp)
 		brelse(bp);
 	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
 	if (pmp) {
 		if (pmp->pm_inusemap)
 			free(pmp->pm_inusemap, M_MSDOSFSFAT);
 		free(pmp, M_MSDOSFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 static int
 msdosfs_start(mp, flags, p)
 	struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 
 	return (0);
 }
 
 /*
  * Unmount the filesystem described by mp.
  */
 static int
 msdosfs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE)
 		flags |= FORCECLOSE;
 	error = vflush(mp, NULLVP, flags);
 	if (error)
 		return error;
 	pmp = VFSTOMSDOSFS(mp);
-	pmp->pm_devvp->v_specflags &= ~SI_MOUNTEDON;
+	pmp->pm_devvp->v_specmountpoint = NULL;
 #ifdef MSDOSFS_DEBUG
 	{
 		struct vnode *vp = pmp->pm_devvp;
 
 		printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
 		printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
 		    vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
 		printf("lastr %d, id %lu, mount %p, op %p\n",
 		    vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
 		printf("freef %p, freeb %p, mount %p\n",
 		    vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
 		    vp->v_mount);
 		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
 		    vp->v_cleanblkhd.lh_first,
 		    vp->v_dirtyblkhd.lh_first,
 		    vp->v_numoutput, vp->v_type);
 		printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
 		    vp->v_socket, vp->v_tag,
 		    ((u_int *)vp->v_data)[0],
 		    ((u_int *)vp->v_data)[1]);
 	}
 #endif
-	error = VOP_CLOSE(pmp->pm_devvp, (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
-	    NOCRED, p);
+	error = VOP_CLOSE(pmp->pm_devvp,
+		    (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
+		    NOCRED, p);
 	vrele(pmp->pm_devvp);
 	free(pmp->pm_inusemap, M_MSDOSFSFAT);
 	free(pmp, M_MSDOSFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 static int
 msdosfs_root(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct denode *ndep;
 	int error;
 
 #ifdef MSDOSFS_DEBUG
 	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
 #endif
 	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
 	if (error)
 		return (error);
 	*vpp = DETOV(ndep);
 	return (0);
 }
 
 static int
 msdosfs_quotactl(mp, cmds, uid, arg, p)
 	struct mount *mp;
 	int cmds;
 	uid_t uid;
 	caddr_t arg;
 	struct proc *p;
 {
 	return EOPNOTSUPP;
 }
 
 static int
 msdosfs_statfs(mp, sbp, p)
 	struct mount *mp;
 	struct statfs *sbp;
 	struct proc *p;
 {
 	struct msdosfsmount *pmp;
 
 	pmp = VFSTOMSDOSFS(mp);
 	sbp->f_bsize = pmp->pm_bpcluster;
 	sbp->f_iosize = pmp->pm_bpcluster;
 	sbp->f_blocks = pmp->pm_nmbrofclusters;
 	sbp->f_bfree = pmp->pm_freeclustercount;
 	sbp->f_bavail = pmp->pm_freeclustercount;
 	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
 	sbp->f_ffree = 0;	/* what to put in here? */
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
 		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
 	}
 	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
 	return (0);
 }
 
 static int
 msdosfs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *vp, *nvp;
 	struct denode *dep;
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	int error, allerror = 0;
 
 	/*
 	 * If we ever switch to not updating all of the fats all the time,
 	 * this would be the place to update them from the first one.
 	 */
 	if (pmp->pm_fmod != 0)
 		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
 			panic("msdosfs_sync: rofs mod");
 		else {
 			/* update fats here */
 		}
 	/*
 	 * Write back each (modified) denode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first;
 	     vp != NULL;
 	     vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * assoicated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		dep = VTODE(vp);
-		if (vp->v_type == VNON || ((dep->de_flag &
-		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0)
-		    && vp->v_dirtyblkhd.lh_first == NULL) {
+		if (vp->v_type == VNON
+		|| (waitfor == MNT_LAZY) /* can this happen with msdosfs? */
+		|| (((dep->de_flag &
+		     (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0)
+		  && (vp->v_dirtyblkhd.lh_first == NULL))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto loop;
 			continue;
 		}
 		error = VOP_FSYNC(vp, cred, waitfor, p);
 		if (error)
 			allerror = error;
 		VOP_UNLOCK(vp, 0, p);
 		vrele(vp);	/* done with this one	 */
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 
 	/*
 	 * Flush filesystem control info.
 	 */
 	error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
 	if (error)
 		allerror = error;
 	return (allerror);
 }
 
 static int
 msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
 	struct defid *defhp = (struct defid *) fhp;
 	struct denode *dep;
 	struct netcred *np;
 	int error;
 
 	np = vfs_export_lookup(mp, &pmp->pm_export, nam);
 	if (np == NULL)
 		return (EACCES);
 	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
 	if (error) {
 		*vpp = NULLVP;
 		return (error);
 	}
 	*vpp = DETOV(dep);
 	*exflagsp = np->netc_exflags;
 	*credanonp = &np->netc_anon;
 	return (0);
 }
 
 static int
 msdosfs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	struct denode *dep;
 	struct defid *defhp;
 
 	dep = VTODE(vp);
 	defhp = (struct defid *)fhp;
 	defhp->defid_len = sizeof(struct defid);
 	defhp->defid_dirclust = dep->de_dirclust;
 	defhp->defid_dirofs = dep->de_diroffset;
 	/* defhp->defid_gen = dep->de_gen; */
 	return (0);
 }
 
 static int
 msdosfs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	return EOPNOTSUPP;
 }
 
 static struct vfsops msdosfs_vfsops = {
 	msdosfs_mount,
 	msdosfs_start,
 	msdosfs_unmount,
 	msdosfs_root,
 	msdosfs_quotactl,
 	msdosfs_statfs,
 	msdosfs_sync,
 	msdosfs_vget,
 	vfs_vrele,
 	msdosfs_fhtovp,
 	msdosfs_vptofh,
 	msdosfs_init
 };
 
 VFS_SET(msdosfs_vfsops, msdos, MOUNT_MSDOS, 0);
Index: head/sys/nfs/nfs_bio.c
===================================================================
--- head/sys/nfs/nfs_bio.c	(revision 34265)
+++ head/sys/nfs/nfs_bio.c	(revision 34266)
@@ -1,1234 +1,1238 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $
+ * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsnode.h>
 
 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
 					struct proc *p));
 static void nfs_prot_buf __P((struct buf *bp, int off, int n));
 
 extern int nfs_numasync;
 extern struct nfsstats nfsstats;
 
 /*
  * Vnode op for VM getpages.
  */
 int
 nfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	int i, error, nextoff, size, toff, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_page_t m;
 	vm_offset_t kva;
 	struct buf *bp;
 
 	if ((ap->a_vp->v_object) == NULL) {
 		printf("nfs_getpages: called with non-merged cache vnode??\n");
 		return EOPNOTSUPP;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf();
 
 	npages = btoc(ap->a_count);
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, ap->a_m, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = ap->a_count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex);
 	uio.uio_resid = ap->a_count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = curproc;
 
 	error = nfs_readrpc(ap->a_vp, &uio, curproc->p_ucred);
 	pmap_qremove(kva, npages);
 
 	relpbuf(bp);
 
 	if (error && (uio.uio_resid == ap->a_count))
 		return VM_PAGER_ERROR;
 
 	size = ap->a_count - uio.uio_resid;
 
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= size) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else {
 			int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
 			vm_page_set_validclean(m, 0, nvalid);
 		}
 		
 		if (i != ap->a_reqpage) {
 			/*
 			 * Whether or not to leave the page activated is up in
 			 * the air, but we should put the page on a page queue
 			 * somewhere (it already is in the object).  Result:
 			 * It appears that emperical results show that
 			 * deactivating pages is best.
 			 */
 
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
 				if (m->flags & PG_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
 				PAGE_WAKEUP(m);
 			} else {
 				vnode_pager_freepage(m);
 			}
 		}
 	}
 	return 0;
 }
 
 /*
  * Vnode op for VM putpages.
  */
 int
 nfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_page_t m;
 	vm_offset_t kva;
 	struct buf *bp;
 	int iomode, must_commit, i, error, npages;
 	int *rtvals;
 
 	rtvals = ap->a_rtvals;
 
 	npages = btoc(ap->a_count);
 
 	for (i = 0; i < npages; i++) {
 		rtvals[i] = VM_PAGER_AGAIN;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf();
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, ap->a_m, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = ap->a_count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex);
 	uio.uio_resid = ap->a_count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = curproc;
 
 	if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
 	    iomode = NFSV3WRITE_UNSTABLE;
 	else
 	    iomode = NFSV3WRITE_FILESYNC;
 
 	error = nfs_writerpc(ap->a_vp, &uio,
 		curproc->p_ucred, &iomode, &must_commit);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp);
 
 	if (!error) {
 		int nwritten = round_page(ap->a_count - uio.uio_resid) / PAGE_SIZE;
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			ap->a_m[i]->dirty = 0;
 		}
 		if (must_commit)
 			nfs_clearcommit(ap->a_vp->v_mount);
 	}
 	return ap->a_rtvals[0];
 }
 
 /*
  * Vnode op for read using bio
  * Any similarity to readip() is purely coincidental
  */
 int
 nfs_bioread(vp, uio, ioflag, cred, getpages)
 	register struct vnode *vp;
 	register struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 	int getpages;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register int biosize, diff, i;
 	struct buf *bp = 0, *rabp;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn, rabn;
 	int bufsize;
 	int nra, error = 0, n = 0, on = 0, not_readin;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("nfs_read mode");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	p = uio->uio_procp;
 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	/*
 	 * For nfs, cache consistency can only be maintained approximately.
 	 * Although RFC1094 does not specify the criteria, the following is
 	 * believed to be compatible with the reference port.
 	 * For nqnfs, full cache consistency is maintained within the loop.
 	 * For nfs:
 	 * If the file's modify time on the server has changed since the
 	 * last read rpc or you have written to the file,
 	 * you may have lost data cache consistency with the
 	 * server, so flush all of the file's data out of the cache.
 	 * Then force a getattr rpc to ensure that you have up to date
 	 * attributes.
 	 * NB: This implies that cache data can be read when up to
 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
 	 * attributes this could be forced by setting n_attrstamp to 0 before
 	 * the VOP_GETATTR() call.
 	 */
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 		if (np->n_flag & NMODIFIED) {
 			if (vp->v_type != VREG) {
 				if (vp->v_type != VDIR)
 					panic("nfs: bioread, not dir");
 				nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	do {
 
 	    /*
 	     * Get a valid lease. If cached data is stale, flush it.
 	     */
 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, cred, p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE) ||
 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
 			if (vp->v_type == VDIR)
 			    nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 			    return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
 		    nfs_invaldir(vp);
 		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 		    if (error)
 			return (error);
 		}
 	    }
 	    if (np->n_flag & NQNFSNONCACHE) {
 		switch (vp->v_type) {
 		case VREG:
 			return (nfs_readrpc(vp, uio, cred));
 		case VLNK:
 			return (nfs_readlinkrpc(vp, uio, cred));
 		case VDIR:
 			break;
 		default:
 			printf(" NQNFSNONCACHE: type %x unexpected\n",	
 				vp->v_type);
 		};
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		not_readin = 1;
 
 		/*
 		 * Start the read ahead(s), as required.
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
 		    for (nra = 0; nra < nmp->nm_readahead &&
 			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
 			rabn = lbn + 1 + nra;
 			if (!incore(vp, rabn)) {
 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
 			    if (!rabp)
 				return (EINTR);
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= (B_READ | B_ASYNC);
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred)) {
 				    rabp->b_flags |= B_INVAL|B_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else
 				brelse(rabp);
 			}
 		    }
 		}
 
 		/*
 		 * If the block is in the cache and has the required data
 		 * in a valid region, just copy it out.
 		 * Otherwise, get the block and write back/read in,
 		 * as required.
 		 */
 again:
 		bufsize = biosize;
 		if ((off_t)(lbn + 1) * biosize > np->n_size && 
 		    (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
 			bufsize = np->n_size - lbn * biosize;
 			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		}
 		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
 		if (!bp)
 			return (EINTR);
 		/*
 		 * If we are being called from nfs_getpages, we must
 		 * make sure the buffer is a vmio buffer.  The vp will
 		 * already be setup for vmio but there may be some old
 		 * non-vmio buffers attached to it.
 		 */
 		if (getpages && !(bp->b_flags & B_VMIO)) {
 #ifdef DIAGNOSTIC
 			printf("nfs_bioread: non vmio buf found, discarding\n");
 #endif
 			bp->b_flags |= B_NOCACHE;
 			bp->b_flags |= B_INVAFTERWRITE;
 			if (bp->b_dirtyend > 0) {
 				if ((bp->b_flags & B_DELWRI) == 0)
 					panic("nfsbioread");
 				if (VOP_BWRITE(bp) == EINTR)
 					return (EINTR);
 			} else
 				brelse(bp);
 			goto again;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		    not_readin = 0;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			brelse(bp);
 			return (error);
 		    }
 		}
 		if (bufsize > on) {
 			n = min((unsigned)(bufsize - on), uio->uio_resid);
 		} else {
 			n = 0;
 		}
 		diff = np->n_size - uio->uio_offset;
 		if (diff < n)
 			n = diff;
 		if (not_readin && n > 0) {
 			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
 				bp->b_flags |= B_NOCACHE;
 				bp->b_flags |= B_INVAFTERWRITE;
 				if (bp->b_dirtyend > 0) {
 				    if ((bp->b_flags & B_DELWRI) == 0)
 					panic("nfsbioread");
 				    if (VOP_BWRITE(bp) == EINTR)
 					return (EINTR);
 				} else
 				    brelse(bp);
 				goto again;
 			}
 		}
 		vp->v_lastr = lbn;
 		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
 		if (diff < n)
 			n = diff;
 		break;
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
 		if (!bp)
 			return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			bp->b_flags |= B_ERROR;
 			brelse(bp);
 			return (error);
 		    }
 		}
 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		on = 0;
 		break;
 	    case VDIR:
 		nfsstats.biocache_readdirs++;
 		if (np->n_direofoffset
 		    && uio->uio_offset >= np->n_direofoffset) {
 		    return (0);
 		}
 		lbn = uio->uio_offset / NFS_DIRBLKSIZ;
 		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
 		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
 		if (!bp)
 		    return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			    brelse(bp);
 		    }
 		    while (error == NFSERR_BAD_COOKIE) {
 			nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
 			/*
 			 * Yuck! The directory has been modified on the
 			 * server. The only way to get the block is by
 			 * reading from the beginning to get all the
 			 * offset cookies.
 			 */
 			for (i = 0; i <= lbn && !error; i++) {
 			    if (np->n_direofoffset
 				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
 				    return (0);
 			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
 			    if (!bp)
 				return (EINTR);
 			    if ((bp->b_flags & B_DONE) == 0) {
 				bp->b_flags |= B_READ;
 				vfs_busy_pages(bp, 0);
 				error = nfs_doio(bp, cred, p);
 				if (error) {
 				    brelse(bp);
 				} else if (i < lbn) {
 				    brelse(bp);
 				}
 			    }
 			}
 		    }
 		    if (error)
 			    return (error);
 		}
 
 		/*
 		 * If not eof and read aheads are enabled, start one.
 		 * (You need the current block first, so that you have the
 		 *  directory offset cookie of the next block.)
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
 		    (np->n_direofoffset == 0 ||
 		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
 		    !(np->n_flag & NQNFSNONCACHE) &&
 		    !incore(vp, lbn + 1)) {
 			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
 			if (rabp) {
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= (B_READ | B_ASYNC);
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred)) {
 				    rabp->b_flags |= B_INVAL|B_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		}
 		/*
 		 * Make sure we use a signed variant of min() since
 		 * the second term may be negative.
 		 */
 		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 		break;
 	    };
 
 	    if (n > 0) {
 		    error = uiomove(bp->b_data + on, (int)n, uio);
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		break;
 	    case VLNK:
 		n = 0;
 		break;
 	    case VDIR:
 		if (np->n_flag & NQNFSNONCACHE)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 	    }
 	    brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
 	return (error);
 }
 
 static void
 nfs_prot_buf(bp, off, n)
 	struct buf *bp;
 	int off;
 	int n;
 {
 	int pindex, boff, end;
 
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
 	end = round_page(off + n);
 	for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) {
 		pindex = boff >> PAGE_SHIFT;
 		vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE);
 	}
 }
 
 /*
  * Vnode op for write using bio
  */
 int
 nfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register int biosize;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	register struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
 	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
 	int bufsize;
 	int n, on, error = 0, iomode, must_commit;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("nfs_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("nfs_write proc");
 #endif
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		return (np->n_error);
 	}
 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	if (ioflag & (IO_APPEND | IO_SYNC)) {
 		if (np->n_flag & NMODIFIED) {
 			np->n_attrstamp = 0;
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 				return (error);
 		}
 		if (ioflag & IO_APPEND) {
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			uio->uio_offset = np->n_size;
 		}
 	}
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, i don't think it matters
 	 */
 	if (p && uio->uio_offset + uio->uio_resid >
 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 		psignal(p, SIGXFSZ);
 		return (EFBIG);
 	}
 	/*
 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
 	 * will be the same size within a filesystem. nfs_writerpc will
 	 * still use nm_wsize when sizing the rpc's.
 	 */
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	do {
 		/*
 		 * Check for a valid write lease.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error)
 				return (error);
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_brev = np->n_lrev;
 			}
 		}
 		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
 		    iomode = NFSV3WRITE_FILESYNC;
 		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
 		    if (must_commit)
 			nfs_clearcommit(vp->v_mount);
 		    return (error);
 		}
 		nfsstats.biocache_writes++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize-1);
 		n = min((unsigned)(biosize - on), uio->uio_resid);
 again:
 		if (uio->uio_offset + n > np->n_size) {
 			np->n_size = uio->uio_offset + n;
 			np->n_flag |= NMODIFIED;
 			vnode_pager_setsize(vp, (u_long)np->n_size);
 		}
 		bufsize = biosize;
 		if ((lbn + 1) * biosize > np->n_size) {
 			bufsize = np->n_size - lbn * biosize;
 			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		}
 		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
 		if (!bp)
 			return (EINTR);
 		if (bp->b_wcred == NOCRED) {
 			crhold(cred);
 			bp->b_wcred = cred;
 		}
 		np->n_flag |= NMODIFIED;
 
 		if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
 			bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
 		}
 
 		/*
 		 * If the new write will leave a contiguous dirty
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 */
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			bp->b_proc = p;
 			if (VOP_BWRITE(bp) == EINTR)
 				return (EINTR);
 			goto again;
 		}
 
 		/*
 		 * Check for valid write lease and get one as required.
 		 * In case getblk() and/or bwrite() delayed us.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				brelse(bp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_brev = np->n_lrev;
 				goto again;
 			}
 		}
 
 		error = uiomove((char *)bp->b_data + on, n, uio);
 		if (error) {
 			bp->b_flags |= B_ERROR;
 			brelse(bp);
 			return (error);
 		}
 
 		/*
 		 * This will keep the buffer and mmaped regions more coherent.
 		 */
 		nfs_prot_buf(bp, on, n);
 
 		if (bp->b_dirtyend > 0) {
 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
 			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
 		} else {
 			bp->b_dirtyoff = on;
 			bp->b_dirtyend = on + n;
 		}
 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
 		    bp->b_validoff > bp->b_dirtyend) {
 			bp->b_validoff = bp->b_dirtyoff;
 			bp->b_validend = bp->b_dirtyend;
 		} else {
 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
 		}
 
 		/*
 		 * Since this block is being modified, it must be written
 		 * again and not just committed.
 		 */
 		bp->b_flags &= ~B_NEEDCOMMIT;
 
 		/*
 		 * If the lease is non-cachable or IO_SYNC do bwrite().
 		 */
 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
 			bp->b_proc = p;
 			if (ioflag & IO_INVAL)
 				bp->b_flags |= B_INVAL;
 			error = VOP_BWRITE(bp);
 			if (error)
 				return (error);
 			if (np->n_flag & NQNFSNONCACHE) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 		} else if ((n + on) == biosize &&
 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 			bp->b_proc = (struct proc *)0;
 			bp->b_flags |= B_ASYNC;
 			(void)nfs_writebp(bp, 0);
 		} else
 			bdwrite(bp);
 	} while (uio->uio_resid > 0 && n > 0);
 	return (0);
 }
 
 /*
  * Get an nfs cache block.
  * Allocate a new one if the block isn't currently in the cache
  * and return the block marked busy. If the calling process is
  * interrupted by a signal for an interruptible mount point, return
  * NULL.
  */
 static struct buf *
 nfs_getcacheblk(vp, bn, size, p)
 	struct vnode *vp;
 	daddr_t bn;
 	int size;
 	struct proc *p;
 {
 	register struct buf *bp;
 	struct mount *mp;
 	struct nfsmount *nmp;
 
 	mp = vp->v_mount;
 	nmp = VFSTONFS(mp);
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
 		while (bp == (struct buf *)0) {
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
 				return ((struct buf *)0);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else
 		bp = getblk(vp, bn, size, 0, 0);
 
 	if( vp->v_type == VREG) {
 		int biosize;
 		biosize = mp->mnt_stat.f_iosize;
 		bp->b_blkno = (bn * biosize) / DEV_BSIZE;
 	}
 
 	return (bp);
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int intrflg;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slpflag, slptimeo;
 
 	if (vp->v_flag & VXLOCK) {
 		return (0);
 	}
 
 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
 		intrflg = 0;
 	if (intrflg) {
 		slpflag = PCATCH;
 		slptimeo = 2 * hz;
 	} else {
 		slpflag = 0;
 		slptimeo = 0;
 	}
 	/*
 	 * First wait for any other process doing a flush to complete.
 	 */
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
 			return (EINTR);
 	}
 
 	/*
 	 * Now, flush as required.
 	 */
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
 				wakeup((caddr_t)&np->n_flag);
 			}
 			return (EINTR);
 		}
 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
 	}
 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
 	if (np->n_flag & NFLUSHWANT) {
 		np->n_flag &= ~NFLUSHWANT;
 		wakeup((caddr_t)&np->n_flag);
 	}
 	return (0);
 }
 
 /*
  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  * This is mainly to avoid queueing async I/O requests when the nfsiods
  * are all hung on a dead server.
  */
 int
 nfs_asyncio(bp, cred)
 	register struct buf *bp;
 	struct ucred *cred;
 {
 	struct nfsmount *nmp;
 	int i;
 	int gotiod;
 	int slpflag = 0;
 	int slptimeo = 0;
 	int error;
 
 	if (nfs_numasync == 0)
 		return (EIO);
 	
 	nmp = VFSTONFS(bp->b_vp->v_mount);
 again:
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	gotiod = FALSE;
 
 	/*
 	 * Find a free iod to process this request.
 	 */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 		if (nfs_iodwant[i]) {
 			/*
 			 * Found one, so wake it up and tell it which
 			 * mount to process.
 			 */
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waking iod %d for mount %p\n",
 				 i, nmp));
 			nfs_iodwant[i] = (struct proc *)0;
 			nfs_iodmount[i] = nmp;
 			nmp->nm_bufqiods++;
 			wakeup((caddr_t)&nfs_iodwant[i]);
 			gotiod = TRUE;
 			break;
 		}
 
 	/*
 	 * If none are free, we may already have an iod working on this mount
 	 * point.  If so, it will process our request.
 	 */
 	if (!gotiod) {
 		if (nmp->nm_bufqiods > 0) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: %d iods are already processing mount %p\n",
 				 nmp->nm_bufqiods, nmp));
 			gotiod = TRUE;
 		}
 	}
 
 	/*
 	 * If we have an iod which can process the request, then queue
 	 * the buffer.
 	 */
 	if (gotiod) {
 		/*
 		 * Ensure that the queue never grows too large.
 		 */
 		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
 			nmp->nm_bufqwant = TRUE;
 			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
 				       "nfsaio", slptimeo);
 			if (error) {
 				if (nfs_sigintr(nmp, NULL, bp->b_proc))
 					return (EINTR);
 				if (slpflag == PCATCH) {
 					slpflag = 0;
 					slptimeo = 2 * hz;
 				}
 			}
 			/*
 			 * We might have lost our iod while sleeping,
 			 * so check and loop if nescessary.
 			 */
 			if (nmp->nm_bufqiods == 0) {
 				NFS_DPF(ASYNCIO,
 					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
 				goto again;
 			}
 		}
 
 		if (bp->b_flags & B_READ) {
 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_rcred = cred;
 			}
 		} else {
 			bp->b_flags |= B_WRITEINPROG;
 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_wcred = cred;
 			}
 		}
 
 		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 		nmp->nm_bufqlen++;
 		return (0);
 	}
 
 	/*
 	 * All the iods are busy on other mounts, so return EIO to
 	 * force the caller to process the i/o synchronously.
 	 */
 	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
 	return (EIO);
 }
 
 /*
  * Do an I/O operation to/from a cache block. This may be called
  * synchronously or from an nfsiod.
  */
 int
 nfs_doio(bp, cr, p)
 	register struct buf *bp;
 	struct ucred *cr;
 	struct proc *p;
 {
 	register struct uio *uiop;
 	register struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error = 0, diff, len, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
 	vp = bp->b_vp;
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_procp = p;
 
 	/*
 	 * Historically, paging was done with physio, but no more.
 	 */
 	if (bp->b_flags & B_PHYS) {
 	    /*
 	     * ...though reading /dev/drum still gets us here.
 	     */
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    /* mapping was done by vmapbuf() */
 	    io.iov_base = bp->b_data;
 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 	    if (bp->b_flags & B_READ) {
 		uiop->uio_rw = UIO_READ;
 		nfsstats.read_physios++;
 		error = nfs_readrpc(vp, uiop, cr);
 	    } else {
 		int com;
 
 		iomode = NFSV3WRITE_DATASYNC;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_physios++;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
 	    }
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else if (bp->b_flags & B_READ) {
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    io.iov_base = bp->b_data;
 	    uiop->uio_rw = UIO_READ;
 	    switch (vp->v_type) {
 	    case VREG:
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop, cr);
 		if (!error) {
 		    bp->b_validoff = 0;
 		    if (uiop->uio_resid) {
 			/*
 			 * If len > 0, there is a hole in the file and
 			 * no writes after the hole have been pushed to
 			 * the server yet.
 			 * Just zero fill the rest of the valid area.
 			 */
 			diff = bp->b_bcount - uiop->uio_resid;
 			len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
 				+ diff);
 			if (len > 0) {
 			    len = min(len, uiop->uio_resid);
 			    bzero((char *)bp->b_data + diff, len);
 			    bp->b_validend = diff + len;
 			} else
 			    bp->b_validend = diff;
 		    } else
 			bp->b_validend = bp->b_bcount;
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
 			  NQNFS_CKINVALID(vp, np, ND_READ) &&
 			  np->n_lrev != np->n_brev) ||
 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 			uprintf("Process killed due to text file modification\n");
 			psignal(p, SIGKILL);
 			p->p_flag |= P_NOSWAP;
 		}
 		break;
 	    case VLNK:
 		uiop->uio_offset = (off_t)0;
 		nfsstats.readlink_bios++;
 		error = nfs_readlinkrpc(vp, uiop, cr);
 		break;
 	    case VDIR:
 		nfsstats.readdir_bios++;
 		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 			error = nfs_readdirplusrpc(vp, uiop, cr);
 			if (error == NFSERR_NOTSUPP)
 				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 		}
 		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 			error = nfs_readdirrpc(vp, uiop, cr);
 		break;
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
 	    };
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
 	    if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
 		bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
 
 	    if (bp->b_dirtyend > bp->b_dirtyoff) {
 		io.iov_len = uiop->uio_resid = bp->b_dirtyend
 		    - bp->b_dirtyoff;
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
 		    + bp->b_dirtyoff;
 		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_bios++;
 		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 		    iomode = NFSV3WRITE_UNSTABLE;
 		else
 		    iomode = NFSV3WRITE_FILESYNC;
 		bp->b_flags |= B_WRITEINPROG;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 		    bp->b_flags |= B_NEEDCOMMIT;
 		    if (bp->b_dirtyoff == 0
 			&& bp->b_dirtyend == bp->b_bufsize)
 			bp->b_flags |= B_CLUSTEROK;
 		} else
 		    bp->b_flags &= ~B_NEEDCOMMIT;
 		bp->b_flags &= ~B_WRITEINPROG;
 
 		/*
 		 * For an interrupted write, the buffer is still valid
 		 * and the write hasn't been pushed to the server yet,
 		 * so we can't set B_ERROR and report the interruption
 		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 		 * is not relevant, so the rpc attempt is essentially
 		 * a noop.  For the case of a V3 write rpc not being
 		 * committed to stable storage, the block is still
 		 * dirty and requires either a commit rpc or another
 		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
 		 * the block is reused. This is indicated by setting
 		 * the B_DELWRI and B_NEEDCOMMIT flags.
 		 */
     		if (error == EINTR
 		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
+			int s;
+
 			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 			++numdirtybuffers;
 			bp->b_flags |= B_DELWRI;
+			s = splbio();
 			reassignbuf(bp, vp);
+			splx(s);
 			if ((bp->b_flags & B_ASYNC) == 0)
 			    bp->b_flags |= B_EINTR;
 	    	} else {
 			if (error) {
 				bp->b_flags |= B_ERROR;
 				bp->b_error = np->n_error = error;
 				np->n_flag |= NWRITEERR;
 			}
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 		}
 	    } else {
 		bp->b_resid = 0;
 		biodone(bp);
 		return (0);
 	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
 		nfs_clearcommit(vp->v_mount);
 	biodone(bp);
 	return (error);
 }
Index: head/sys/nfs/nfs_vnops.c
===================================================================
--- head/sys/nfs/nfs_vnops.c	(revision 34265)
+++ head/sys/nfs/nfs_vnops.c	(revision 34266)
@@ -1,3296 +1,3303 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $
+ * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <miscfs/fifofs/fifo.h>
 #include <miscfs/specfs/specdev.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_mmap __P((struct vop_mmap_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
 static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) nfs_abortop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
 	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
 				struct ucred *cred, struct proc *procp));
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, char *name, int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_long nfs_true, nfs_false;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register u_long *tl;
 	register caddr_t cp;
 	register int t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_long mode, rmode;
 	int v3 = NFS_ISV3(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG: case VDIR: case VLNK:
 			return (EROFS);
 		}
 	}
 	/*
 	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about, but
 	 * this is better than just returning anything that is lying about
 	 * in the cache.
 	 */
 	if (v3) {
 		nfsstats.rpccnt[NFSPROC_ACCESS]++;
 		nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type == VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		}
 		*tl = txdr_unsigned(mode);
 		nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (!error) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			rmode = fxdr_unsigned(u_long, *tl);
 			/*
 			 * The NFS V3 spec does not clarify whether or not
 			 * the returned access bits can be a superset of
 			 * the ones requested, so...
 			 */
 			if ((rmode & mode) != mode)
 				error = EACCES;
 		}
 		nfsm_reqdone;
 		return (error);
 	} else {
 		if (error = nfsspec_access(ap))
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type = VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 { printf("open eacces vtyp=%d\n",vp->v_type);
 		return (EACCES);
 }
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error) {
 			return (error);
 		    }
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR) {
 				return (error);
 			}
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR) {
 				return (error);
 			}
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error) {
 				return (error);
 			}
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error) {
 				return (error);
 			}
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR) {
 					return (error);
 				}
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
 		    np->n_flag &= ~NMODIFIED;
 		} else
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_long *tl;
 	register int t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error)
 		nfsm_loadattr(vp, ap->a_vap);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (u_short)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error)
  				return (error);
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
  			vnode_pager_setsize(vp, (u_long)np->n_size);
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, (u_long)np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_long *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		if (vap->va_mode != (u_short)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_mode);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_uid != (uid_t)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_uid);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_gid != (gid_t)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_gid);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_size != VNOVAL) {
 			nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			txdr_hyper(&vap->va_size, tl);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vap->va_atime.tv_sec != time.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_atime, tl);
 			} else {
 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
 			}
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vap->va_mtime.tv_sec != time.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_mtime, tl);
 			} else {
 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
 			}
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
 		}
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (u_short)VNOVAL)
 			sp->sa_mode = VNOVAL;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = VNOVAL;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = VNOVAL;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct componentname *cnp = ap->a_cnp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vnode **vpp = ap->a_vpp;
 	register int flags = cnp->cn_flags;
 	register struct vnode *newvp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	*vpp = NULLVP;
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN))
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error)
 			return (error);
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dvp, 0, p);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN) &&
 		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
 		    	vput(newvp);
 			return (error);
 		}
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN))
 			VOP_UNLOCK(dvp, 0, p);
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent)
 				VOP_UNLOCK(dvp, 0, p);
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > 0xffffffff && !v3)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(&uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register int t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > 0xffffffff && !v3)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(&uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 		} else {
 			nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED);
 			*++tl = txdr_unsigned(uiop->uio_offset);
 			tl += 2;
 		}
 		*tl = txdr_unsigned(len);
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
 					NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_flag |= NFSMNT_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_long rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = 0xffffffff;
 	else {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (EOPNOTSUPP);
 	}
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		sp3 = (struct nfsv3_sattr *)tl;
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(major(vap->va_rdev));
 			*tl = txdr_unsigned(minor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *newvp;
 	int error;
 
 	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
 	if (!error)
 		vput(newvp);
 	return (error);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 		    *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 		    nfsm_build(tl, u_long *, NFSX_V3CREATEVERF);
 #ifdef INET
 		    if (!TAILQ_EMPTY(&in_ifaddrhead))
 			*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 		    else
 #endif
 			*tl++ = create_verf;
 		    *tl = ++create_verf;
 		} else {
 		    *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 		    nfsm_build(tl, u_long *, NFSX_V3SRVSATTR);
 		    sp3 = (struct nfsv3_sattr *)tl;
 		    nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL))
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
 	vput(dvp);
 	if (vp == dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	if (vp->v_mount != tdvp->v_mount) {
 		VOP_ABORTOP(vp, cnp);
 		if (tdvp == vp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	vput(tdvp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid,
 			cnp->cn_cred->cr_gid);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
 		sp->sa_size = -1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (!error)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 	nfsm_reqdone;
 	if (newvp)
 		vput(newvp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = -1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	vput(vp);
 	vput(dvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred, 0);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 				fxdr_hyper(tl, &fileno);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			fxdr_hyper(tl, &fileno);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    if (error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np))
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 				cnp->cn_hash = 0;
 				for (cp = cnp->cn_nameptr, i = 1; i <= len;
 				    i++, cp++)
 				    cnp->cn_hash += (unsigned char)*cp * i;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    vrele(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	if (error = nfs_renameit(dvp, cnp, sp))
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp)
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 static int
 nfs_commit(vp, offset, cnt, cred, procp)
 	register struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_long *tl;
 	register int t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(&offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 	if (bp->b_flags & B_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 static int
 nfs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
 			    == (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
 				!= (B_DELWRI | B_NEEDCOMMIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= (B_BUSY | B_WRITEINPROG);
 			vfs_busy_pages(bp, 1);
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
 			if (retv) {
 			    vfs_unbusy_pages(bp);
 			    brelse(bp);
 			} else {
 			    vp->v_numoutput++;
 			    bp->b_flags |= B_ASYNC;
 			    if (bp->b_flags & B_DELWRI) {
 				--numdirtybuffers;
 			    	if (needsbuffer) {
 					vfs_bio_need_satisfy();
 				}
 			    }
+			    s = splbio();	/* XXX check this positionning */
 			    bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
 			    bp->b_dirtyoff = bp->b_dirtyend = 0;
 			    reassignbuf(bp, vp);
+			    splx(s);
 			    biodone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
 		if (bp->b_flags & B_BUSY) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			bp->b_flags |= B_WANTED;
 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
 				"nfsfsync", slptimeo);
 			splx(s);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
 			continue;
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= (B_BUSY|B_ASYNC);
 		else
 		    bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
 		splx(s);
 		VOP_BWRITE(bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (vp->v_dirtyblkhd.lh_first && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%lx",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Just call nfs_writebp() with the force argument set to 1.
  */
 static int
 nfs_bwrite(ap)
 	struct vop_bwrite_args /* {
 		struct vnode *a_bp;
 	} */ *ap;
 {
 
 	return (nfs_writebp(ap->a_bp, 1));
 }
 
 /*
  * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
  * the force flag is one and it also handles the B_NEEDCOMMIT flag.
  */
 int
 nfs_writebp(bp, force)
 	register struct buf *bp;
 	int force;
 {
+	int s;
 	register int oldflags = bp->b_flags, retv = 1;
 	off_t off;
 
 	if(!(bp->b_flags & B_BUSY))
 		panic("bwrite: buffer is not busy???");
 
 	if (bp->b_flags & B_INVAL)
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 
 	if (bp->b_flags & B_DELWRI) {
 		--numdirtybuffers;
 		if (needsbuffer)
 			vfs_bio_need_satisfy();
 	}
+	s = splbio(); /* XXX check if needed */
 	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
 
 	if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) {
 		reassignbuf(bp, bp->b_vp);
 	}
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
+	splx(s);
 
 	/*
 	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
 	 * an actual write will have to be scheduled via. VOP_STRATEGY().
 	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
 	 */
 	vfs_busy_pages(bp, 1);
 	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
 		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		bp->b_flags |= B_WRITEINPROG;
 		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 			bp->b_wcred, bp->b_proc);
 		bp->b_flags &= ~B_WRITEINPROG;
 		if (!retv) {
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 			bp->b_flags &= ~B_NEEDCOMMIT;
 			biodone(bp);
 		} else if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(bp->b_vp->v_mount);
 	}
 	if (retv) {
 		if (force)
 			bp->b_flags |= B_WRITEINPROG;
 		VOP_STRATEGY(bp);
 	}
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
+			s = splbio();
 			reassignbuf(bp, bp->b_vp);
+			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG: case VDIR: case VLNK:
 			return (EROFS);
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	gettime(&tv);
 	np->n_atim.tv_sec = tv.tv_sec;
 	np->n_atim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	gettime(&tv);
 	np->n_mtim.tv_sec = tv.tv_sec;
 	np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	gettime(&tv);
 	np->n_atim.tv_sec = tv.tv_sec;
 	np->n_atim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	gettime(&tv);
 	np->n_mtim.tv_sec = tv.tv_sec;
 	np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct timeval tv;
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		gettime(&tv);
 		if (np->n_flag & NACC) {
 			np->n_atim.tv_sec = tv.tv_sec;
 			np->n_atim.tv_nsec = tv.tv_usec * 1000;
 		}
 		if (np->n_flag & NUPD) {
 			np->n_mtim.tv_sec = tv.tv_sec;
 			np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 		}
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/nfsclient/nfs_bio.c
===================================================================
--- head/sys/nfsclient/nfs_bio.c	(revision 34265)
+++ head/sys/nfsclient/nfs_bio.c	(revision 34266)
@@ -1,1234 +1,1238 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_bio.c	8.9 (Berkeley) 3/30/95
- * $Id: nfs_bio.c,v 1.51 1998/03/06 09:46:43 msmith Exp $
+ * $Id: nfs_bio.c,v 1.52 1998/03/07 21:36:01 dyson Exp $
  */
 
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vnode_pager.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsmount.h>
 #include <nfs/nqnfs.h>
 #include <nfs/nfsnode.h>
 
 static struct buf *nfs_getcacheblk __P((struct vnode *vp, daddr_t bn, int size,
 					struct proc *p));
 static void nfs_prot_buf __P((struct buf *bp, int off, int n));
 
 extern int nfs_numasync;
 extern struct nfsstats nfsstats;
 
 /*
  * Vnode op for VM getpages.
  */
 int
 nfs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	int i, error, nextoff, size, toff, npages;
 	struct uio uio;
 	struct iovec iov;
 	vm_page_t m;
 	vm_offset_t kva;
 	struct buf *bp;
 
 	if ((ap->a_vp->v_object) == NULL) {
 		printf("nfs_getpages: called with non-merged cache vnode??\n");
 		return EOPNOTSUPP;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf();
 
 	npages = btoc(ap->a_count);
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, ap->a_m, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = ap->a_count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex);
 	uio.uio_resid = ap->a_count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_READ;
 	uio.uio_procp = curproc;
 
 	error = nfs_readrpc(ap->a_vp, &uio, curproc->p_ucred);
 	pmap_qremove(kva, npages);
 
 	relpbuf(bp);
 
 	if (error && (uio.uio_resid == ap->a_count))
 		return VM_PAGER_ERROR;
 
 	size = ap->a_count - uio.uio_resid;
 
 	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
 		vm_page_t m;
 		nextoff = toff + PAGE_SIZE;
 		m = ap->a_m[i];
 
 		m->flags &= ~PG_ZERO;
 
 		if (nextoff <= size) {
 			m->valid = VM_PAGE_BITS_ALL;
 			m->dirty = 0;
 		} else {
 			int nvalid = ((size + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
 			vm_page_set_validclean(m, 0, nvalid);
 		}
 		
 		if (i != ap->a_reqpage) {
 			/*
 			 * Whether or not to leave the page activated is up in
 			 * the air, but we should put the page on a page queue
 			 * somewhere (it already is in the object).  Result:
 			 * It appears that emperical results show that
 			 * deactivating pages is best.
 			 */
 
 			/*
 			 * Just in case someone was asking for this page we
 			 * now tell them that it is ok to use.
 			 */
 			if (!error) {
 				if (m->flags & PG_WANTED)
 					vm_page_activate(m);
 				else
 					vm_page_deactivate(m);
 				PAGE_WAKEUP(m);
 			} else {
 				vnode_pager_freepage(m);
 			}
 		}
 	}
 	return 0;
 }
 
 /*
  * Vnode op for VM putpages.
  */
 int
 nfs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	struct uio uio;
 	struct iovec iov;
 	vm_page_t m;
 	vm_offset_t kva;
 	struct buf *bp;
 	int iomode, must_commit, i, error, npages;
 	int *rtvals;
 
 	rtvals = ap->a_rtvals;
 
 	npages = btoc(ap->a_count);
 
 	for (i = 0; i < npages; i++) {
 		rtvals[i] = VM_PAGER_AGAIN;
 	}
 
 	/*
 	 * We use only the kva address for the buffer, but this is extremely
 	 * convienient and fast.
 	 */
 	bp = getpbuf();
 
 	kva = (vm_offset_t) bp->b_data;
 	pmap_qenter(kva, ap->a_m, npages);
 
 	iov.iov_base = (caddr_t) kva;
 	iov.iov_len = ap->a_count;
 	uio.uio_iov = &iov;
 	uio.uio_iovcnt = 1;
 	uio.uio_offset = IDX_TO_OFF(ap->a_m[0]->pindex);
 	uio.uio_resid = ap->a_count;
 	uio.uio_segflg = UIO_SYSSPACE;
 	uio.uio_rw = UIO_WRITE;
 	uio.uio_procp = curproc;
 
 	if ((ap->a_sync & VM_PAGER_PUT_SYNC) == 0)
 	    iomode = NFSV3WRITE_UNSTABLE;
 	else
 	    iomode = NFSV3WRITE_FILESYNC;
 
 	error = nfs_writerpc(ap->a_vp, &uio,
 		curproc->p_ucred, &iomode, &must_commit);
 
 	pmap_qremove(kva, npages);
 	relpbuf(bp);
 
 	if (!error) {
 		int nwritten = round_page(ap->a_count - uio.uio_resid) / PAGE_SIZE;
 		for (i = 0; i < nwritten; i++) {
 			rtvals[i] = VM_PAGER_OK;
 			ap->a_m[i]->dirty = 0;
 		}
 		if (must_commit)
 			nfs_clearcommit(ap->a_vp->v_mount);
 	}
 	return ap->a_rtvals[0];
 }
 
 /*
  * Vnode op for read using bio
  * Any similarity to readip() is purely coincidental
  */
 int
 nfs_bioread(vp, uio, ioflag, cred, getpages)
 	register struct vnode *vp;
 	register struct uio *uio;
 	int ioflag;
 	struct ucred *cred;
 	int getpages;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register int biosize, diff, i;
 	struct buf *bp = 0, *rabp;
 	struct vattr vattr;
 	struct proc *p;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn, rabn;
 	int bufsize;
 	int nra, error = 0, n = 0, on = 0, not_readin;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("nfs_read mode");
 #endif
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	p = uio->uio_procp;
 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	/*
 	 * For nfs, cache consistency can only be maintained approximately.
 	 * Although RFC1094 does not specify the criteria, the following is
 	 * believed to be compatible with the reference port.
 	 * For nqnfs, full cache consistency is maintained within the loop.
 	 * For nfs:
 	 * If the file's modify time on the server has changed since the
 	 * last read rpc or you have written to the file,
 	 * you may have lost data cache consistency with the
 	 * server, so flush all of the file's data out of the cache.
 	 * Then force a getattr rpc to ensure that you have up to date
 	 * attributes.
 	 * NB: This implies that cache data can be read when up to
 	 * NFS_ATTRTIMEO seconds out of date. If you find that you need current
 	 * attributes this could be forced by setting n_attrstamp to 0 before
 	 * the VOP_GETATTR() call.
 	 */
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 		if (np->n_flag & NMODIFIED) {
 			if (vp->v_type != VREG) {
 				if (vp->v_type != VDIR)
 					panic("nfs: bioread, not dir");
 				nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					nfs_invaldir(vp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	do {
 
 	    /*
 	     * Get a valid lease. If cached data is stale, flush it.
 	     */
 	    if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, cred, p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error)
 			return (error);
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE) ||
 			((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
 			if (vp->v_type == VDIR)
 			    nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 			    return (error);
 			np->n_brev = np->n_lrev;
 		    }
 		} else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
 		    nfs_invaldir(vp);
 		    error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 		    if (error)
 			return (error);
 		}
 	    }
 	    if (np->n_flag & NQNFSNONCACHE) {
 		switch (vp->v_type) {
 		case VREG:
 			return (nfs_readrpc(vp, uio, cred));
 		case VLNK:
 			return (nfs_readlinkrpc(vp, uio, cred));
 		case VDIR:
 			break;
 		default:
 			printf(" NQNFSNONCACHE: type %x unexpected\n",	
 				vp->v_type);
 		};
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		nfsstats.biocache_reads++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize - 1);
 		not_readin = 1;
 
 		/*
 		 * Start the read ahead(s), as required.
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0) {
 		    for (nra = 0; nra < nmp->nm_readahead &&
 			(off_t)(lbn + 1 + nra) * biosize < np->n_size; nra++) {
 			rabn = lbn + 1 + nra;
 			if (!incore(vp, rabn)) {
 			    rabp = nfs_getcacheblk(vp, rabn, biosize, p);
 			    if (!rabp)
 				return (EINTR);
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= (B_READ | B_ASYNC);
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred)) {
 				    rabp->b_flags |= B_INVAL|B_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else
 				brelse(rabp);
 			}
 		    }
 		}
 
 		/*
 		 * If the block is in the cache and has the required data
 		 * in a valid region, just copy it out.
 		 * Otherwise, get the block and write back/read in,
 		 * as required.
 		 */
 again:
 		bufsize = biosize;
 		if ((off_t)(lbn + 1) * biosize > np->n_size && 
 		    (off_t)(lbn + 1) * biosize - np->n_size < biosize) {
 			bufsize = np->n_size - lbn * biosize;
 			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		}
 		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
 		if (!bp)
 			return (EINTR);
 		/*
 		 * If we are being called from nfs_getpages, we must
 		 * make sure the buffer is a vmio buffer.  The vp will
 		 * already be setup for vmio but there may be some old
 		 * non-vmio buffers attached to it.
 		 */
 		if (getpages && !(bp->b_flags & B_VMIO)) {
 #ifdef DIAGNOSTIC
 			printf("nfs_bioread: non vmio buf found, discarding\n");
 #endif
 			bp->b_flags |= B_NOCACHE;
 			bp->b_flags |= B_INVAFTERWRITE;
 			if (bp->b_dirtyend > 0) {
 				if ((bp->b_flags & B_DELWRI) == 0)
 					panic("nfsbioread");
 				if (VOP_BWRITE(bp) == EINTR)
 					return (EINTR);
 			} else
 				brelse(bp);
 			goto again;
 		}
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    bp->b_flags &= ~(B_DONE | B_ERROR | B_INVAL);
 		    not_readin = 0;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			brelse(bp);
 			return (error);
 		    }
 		}
 		if (bufsize > on) {
 			n = min((unsigned)(bufsize - on), uio->uio_resid);
 		} else {
 			n = 0;
 		}
 		diff = np->n_size - uio->uio_offset;
 		if (diff < n)
 			n = diff;
 		if (not_readin && n > 0) {
 			if (on < bp->b_validoff || (on + n) > bp->b_validend) {
 				bp->b_flags |= B_NOCACHE;
 				bp->b_flags |= B_INVAFTERWRITE;
 				if (bp->b_dirtyend > 0) {
 				    if ((bp->b_flags & B_DELWRI) == 0)
 					panic("nfsbioread");
 				    if (VOP_BWRITE(bp) == EINTR)
 					return (EINTR);
 				} else
 				    brelse(bp);
 				goto again;
 			}
 		}
 		vp->v_lastr = lbn;
 		diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
 		if (diff < n)
 			n = diff;
 		break;
 	    case VLNK:
 		nfsstats.biocache_readlinks++;
 		bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
 		if (!bp)
 			return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			bp->b_flags |= B_ERROR;
 			brelse(bp);
 			return (error);
 		    }
 		}
 		n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
 		on = 0;
 		break;
 	    case VDIR:
 		nfsstats.biocache_readdirs++;
 		if (np->n_direofoffset
 		    && uio->uio_offset >= np->n_direofoffset) {
 		    return (0);
 		}
 		lbn = uio->uio_offset / NFS_DIRBLKSIZ;
 		on = uio->uio_offset & (NFS_DIRBLKSIZ - 1);
 		bp = nfs_getcacheblk(vp, lbn, NFS_DIRBLKSIZ, p);
 		if (!bp)
 		    return (EINTR);
 		if ((bp->b_flags & B_CACHE) == 0) {
 		    bp->b_flags |= B_READ;
 		    vfs_busy_pages(bp, 0);
 		    error = nfs_doio(bp, cred, p);
 		    if (error) {
 			    brelse(bp);
 		    }
 		    while (error == NFSERR_BAD_COOKIE) {
 			nfs_invaldir(vp);
 			error = nfs_vinvalbuf(vp, 0, cred, p, 1);
 			/*
 			 * Yuck! The directory has been modified on the
 			 * server. The only way to get the block is by
 			 * reading from the beginning to get all the
 			 * offset cookies.
 			 */
 			for (i = 0; i <= lbn && !error; i++) {
 			    if (np->n_direofoffset
 				&& (i * NFS_DIRBLKSIZ) >= np->n_direofoffset)
 				    return (0);
 			    bp = nfs_getcacheblk(vp, i, NFS_DIRBLKSIZ, p);
 			    if (!bp)
 				return (EINTR);
 			    if ((bp->b_flags & B_DONE) == 0) {
 				bp->b_flags |= B_READ;
 				vfs_busy_pages(bp, 0);
 				error = nfs_doio(bp, cred, p);
 				if (error) {
 				    brelse(bp);
 				} else if (i < lbn) {
 				    brelse(bp);
 				}
 			    }
 			}
 		    }
 		    if (error)
 			    return (error);
 		}
 
 		/*
 		 * If not eof and read aheads are enabled, start one.
 		 * (You need the current block first, so that you have the
 		 *  directory offset cookie of the next block.)
 		 */
 		if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
 		    (np->n_direofoffset == 0 ||
 		    (lbn + 1) * NFS_DIRBLKSIZ < np->n_direofoffset) &&
 		    !(np->n_flag & NQNFSNONCACHE) &&
 		    !incore(vp, lbn + 1)) {
 			rabp = nfs_getcacheblk(vp, lbn + 1, NFS_DIRBLKSIZ, p);
 			if (rabp) {
 			    if ((rabp->b_flags & (B_CACHE|B_DELWRI)) == 0) {
 				rabp->b_flags |= (B_READ | B_ASYNC);
 				vfs_busy_pages(rabp, 0);
 				if (nfs_asyncio(rabp, cred)) {
 				    rabp->b_flags |= B_INVAL|B_ERROR;
 				    vfs_unbusy_pages(rabp);
 				    brelse(rabp);
 				}
 			    } else {
 				brelse(rabp);
 			    }
 			}
 		}
 		/*
 		 * Make sure we use a signed variant of min() since
 		 * the second term may be negative.
 		 */
 		n = lmin(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid - on);
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 		break;
 	    };
 
 	    if (n > 0) {
 		    error = uiomove(bp->b_data + on, (int)n, uio);
 	    }
 	    switch (vp->v_type) {
 	    case VREG:
 		break;
 	    case VLNK:
 		n = 0;
 		break;
 	    case VDIR:
 		if (np->n_flag & NQNFSNONCACHE)
 			bp->b_flags |= B_INVAL;
 		break;
 	    default:
 		printf(" nfs_bioread: type %x unexpected\n",vp->v_type);
 	    }
 	    brelse(bp);
 	} while (error == 0 && uio->uio_resid > 0 && n > 0);
 	return (error);
 }
 
 static void
 nfs_prot_buf(bp, off, n)
 	struct buf *bp;
 	int off;
 	int n;
 {
 	int pindex, boff, end;
 
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
 	end = round_page(off + n);
 	for (boff = trunc_page(off); boff < end; boff += PAGE_SIZE) {
 		pindex = boff >> PAGE_SHIFT;
 		vm_page_protect(bp->b_pages[pindex], VM_PROT_NONE);
 	}
 }
 
 /*
  * Vnode op for write using bio
  */
 int
 nfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register int biosize;
 	register struct uio *uio = ap->a_uio;
 	struct proc *p = uio->uio_procp;
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	register struct ucred *cred = ap->a_cred;
 	int ioflag = ap->a_ioflag;
 	struct buf *bp;
 	struct vattr vattr;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	daddr_t lbn;
 	int bufsize;
 	int n, on, error = 0, iomode, must_commit;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("nfs_write mode");
 	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
 		panic("nfs_write proc");
 #endif
 	if (vp->v_type != VREG)
 		return (EIO);
 	if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		return (np->n_error);
 	}
 	if ((nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_GOTFSINFO)) == NFSMNT_NFSV3)
 		(void)nfs_fsinfo(nmp, vp, cred, p);
 	if (ioflag & (IO_APPEND | IO_SYNC)) {
 		if (np->n_flag & NMODIFIED) {
 			np->n_attrstamp = 0;
 			error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 			if (error)
 				return (error);
 		}
 		if (ioflag & IO_APPEND) {
 			np->n_attrstamp = 0;
 			error = VOP_GETATTR(vp, &vattr, cred, p);
 			if (error)
 				return (error);
 			uio->uio_offset = np->n_size;
 		}
 	}
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, i don't think it matters
 	 */
 	if (p && uio->uio_offset + uio->uio_resid >
 	      p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 		psignal(p, SIGXFSZ);
 		return (EFBIG);
 	}
 	/*
 	 * I use nm_rsize, not nm_wsize so that all buffer cache blocks
 	 * will be the same size within a filesystem. nfs_writerpc will
 	 * still use nm_wsize when sizing the rpc's.
 	 */
 	biosize = vp->v_mount->mnt_stat.f_iosize;
 	do {
 		/*
 		 * Check for a valid write lease.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error)
 				return (error);
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_brev = np->n_lrev;
 			}
 		}
 		if ((np->n_flag & NQNFSNONCACHE) && uio->uio_iovcnt == 1) {
 		    iomode = NFSV3WRITE_FILESYNC;
 		    error = nfs_writerpc(vp, uio, cred, &iomode, &must_commit);
 		    if (must_commit)
 			nfs_clearcommit(vp->v_mount);
 		    return (error);
 		}
 		nfsstats.biocache_writes++;
 		lbn = uio->uio_offset / biosize;
 		on = uio->uio_offset & (biosize-1);
 		n = min((unsigned)(biosize - on), uio->uio_resid);
 again:
 		if (uio->uio_offset + n > np->n_size) {
 			np->n_size = uio->uio_offset + n;
 			np->n_flag |= NMODIFIED;
 			vnode_pager_setsize(vp, (u_long)np->n_size);
 		}
 		bufsize = biosize;
 		if ((lbn + 1) * biosize > np->n_size) {
 			bufsize = np->n_size - lbn * biosize;
 			bufsize = (bufsize + DEV_BSIZE - 1) & ~(DEV_BSIZE - 1);
 		}
 		bp = nfs_getcacheblk(vp, lbn, bufsize, p);
 		if (!bp)
 			return (EINTR);
 		if (bp->b_wcred == NOCRED) {
 			crhold(cred);
 			bp->b_wcred = cred;
 		}
 		np->n_flag |= NMODIFIED;
 
 		if ((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend > np->n_size) {
 			bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
 		}
 
 		/*
 		 * If the new write will leave a contiguous dirty
 		 * area, just update the b_dirtyoff and b_dirtyend,
 		 * otherwise force a write rpc of the old dirty area.
 		 */
 		if (bp->b_dirtyend > 0 &&
 		    (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
 			bp->b_proc = p;
 			if (VOP_BWRITE(bp) == EINTR)
 				return (EINTR);
 			goto again;
 		}
 
 		/*
 		 * Check for valid write lease and get one as required.
 		 * In case getblk() and/or bwrite() delayed us.
 		 */
 		if ((nmp->nm_flag & NFSMNT_NQNFS) &&
 		    NQNFS_CKINVALID(vp, np, ND_WRITE)) {
 			do {
 				error = nqnfs_getlease(vp, ND_WRITE, cred, p);
 			} while (error == NQNFS_EXPIRED);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			if (np->n_lrev != np->n_brev ||
 			    (np->n_flag & NQNFSNONCACHE)) {
 				brelse(bp);
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 				np->n_brev = np->n_lrev;
 				goto again;
 			}
 		}
 
 		error = uiomove((char *)bp->b_data + on, n, uio);
 		if (error) {
 			bp->b_flags |= B_ERROR;
 			brelse(bp);
 			return (error);
 		}
 
 		/*
 		 * This will keep the buffer and mmaped regions more coherent.
 		 */
 		nfs_prot_buf(bp, on, n);
 
 		if (bp->b_dirtyend > 0) {
 			bp->b_dirtyoff = min(on, bp->b_dirtyoff);
 			bp->b_dirtyend = max((on + n), bp->b_dirtyend);
 		} else {
 			bp->b_dirtyoff = on;
 			bp->b_dirtyend = on + n;
 		}
 		if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
 		    bp->b_validoff > bp->b_dirtyend) {
 			bp->b_validoff = bp->b_dirtyoff;
 			bp->b_validend = bp->b_dirtyend;
 		} else {
 			bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
 			bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
 		}
 
 		/*
 		 * Since this block is being modified, it must be written
 		 * again and not just committed.
 		 */
 		bp->b_flags &= ~B_NEEDCOMMIT;
 
 		/*
 		 * If the lease is non-cachable or IO_SYNC do bwrite().
 		 */
 		if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
 			bp->b_proc = p;
 			if (ioflag & IO_INVAL)
 				bp->b_flags |= B_INVAL;
 			error = VOP_BWRITE(bp);
 			if (error)
 				return (error);
 			if (np->n_flag & NQNFSNONCACHE) {
 				error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1);
 				if (error)
 					return (error);
 			}
 		} else if ((n + on) == biosize &&
 			(nmp->nm_flag & NFSMNT_NQNFS) == 0) {
 			bp->b_proc = (struct proc *)0;
 			bp->b_flags |= B_ASYNC;
 			(void)nfs_writebp(bp, 0);
 		} else
 			bdwrite(bp);
 	} while (uio->uio_resid > 0 && n > 0);
 	return (0);
 }
 
 /*
  * Get an nfs cache block.
  * Allocate a new one if the block isn't currently in the cache
  * and return the block marked busy. If the calling process is
  * interrupted by a signal for an interruptible mount point, return
  * NULL.
  */
 static struct buf *
 nfs_getcacheblk(vp, bn, size, p)
 	struct vnode *vp;
 	daddr_t bn;
 	int size;
 	struct proc *p;
 {
 	register struct buf *bp;
 	struct mount *mp;
 	struct nfsmount *nmp;
 
 	mp = vp->v_mount;
 	nmp = VFSTONFS(mp);
 
 	if (nmp->nm_flag & NFSMNT_INT) {
 		bp = getblk(vp, bn, size, PCATCH, 0);
 		while (bp == (struct buf *)0) {
 			if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
 				return ((struct buf *)0);
 			bp = getblk(vp, bn, size, 0, 2 * hz);
 		}
 	} else
 		bp = getblk(vp, bn, size, 0, 0);
 
 	if( vp->v_type == VREG) {
 		int biosize;
 		biosize = mp->mnt_stat.f_iosize;
 		bp->b_blkno = (bn * biosize) / DEV_BSIZE;
 	}
 
 	return (bp);
 }
 
 /*
  * Flush and invalidate all dirty buffers. If another process is already
  * doing the flush, just wait for completion.
  */
 int
 nfs_vinvalbuf(vp, flags, cred, p, intrflg)
 	struct vnode *vp;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 	int intrflg;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slpflag, slptimeo;
 
 	if (vp->v_flag & VXLOCK) {
 		return (0);
 	}
 
 	if ((nmp->nm_flag & NFSMNT_INT) == 0)
 		intrflg = 0;
 	if (intrflg) {
 		slpflag = PCATCH;
 		slptimeo = 2 * hz;
 	} else {
 		slpflag = 0;
 		slptimeo = 0;
 	}
 	/*
 	 * First wait for any other process doing a flush to complete.
 	 */
 	while (np->n_flag & NFLUSHINPROG) {
 		np->n_flag |= NFLUSHWANT;
 		error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
 			slptimeo);
 		if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
 			return (EINTR);
 	}
 
 	/*
 	 * Now, flush as required.
 	 */
 	np->n_flag |= NFLUSHINPROG;
 	error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
 	while (error) {
 		if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 			np->n_flag &= ~NFLUSHINPROG;
 			if (np->n_flag & NFLUSHWANT) {
 				np->n_flag &= ~NFLUSHWANT;
 				wakeup((caddr_t)&np->n_flag);
 			}
 			return (EINTR);
 		}
 		error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
 	}
 	np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
 	if (np->n_flag & NFLUSHWANT) {
 		np->n_flag &= ~NFLUSHWANT;
 		wakeup((caddr_t)&np->n_flag);
 	}
 	return (0);
 }
 
 /*
  * Initiate asynchronous I/O. Return an error if no nfsiods are available.
  * This is mainly to avoid queueing async I/O requests when the nfsiods
  * are all hung on a dead server.
  */
 int
 nfs_asyncio(bp, cred)
 	register struct buf *bp;
 	struct ucred *cred;
 {
 	struct nfsmount *nmp;
 	int i;
 	int gotiod;
 	int slpflag = 0;
 	int slptimeo = 0;
 	int error;
 
 	if (nfs_numasync == 0)
 		return (EIO);
 	
 	nmp = VFSTONFS(bp->b_vp->v_mount);
 again:
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	gotiod = FALSE;
 
 	/*
 	 * Find a free iod to process this request.
 	 */
 	for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
 		if (nfs_iodwant[i]) {
 			/*
 			 * Found one, so wake it up and tell it which
 			 * mount to process.
 			 */
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waking iod %d for mount %p\n",
 				 i, nmp));
 			nfs_iodwant[i] = (struct proc *)0;
 			nfs_iodmount[i] = nmp;
 			nmp->nm_bufqiods++;
 			wakeup((caddr_t)&nfs_iodwant[i]);
 			gotiod = TRUE;
 			break;
 		}
 
 	/*
 	 * If none are free, we may already have an iod working on this mount
 	 * point.  If so, it will process our request.
 	 */
 	if (!gotiod) {
 		if (nmp->nm_bufqiods > 0) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: %d iods are already processing mount %p\n",
 				 nmp->nm_bufqiods, nmp));
 			gotiod = TRUE;
 		}
 	}
 
 	/*
 	 * If we have an iod which can process the request, then queue
 	 * the buffer.
 	 */
 	if (gotiod) {
 		/*
 		 * Ensure that the queue never grows too large.
 		 */
 		while (nmp->nm_bufqlen >= 2*nfs_numasync) {
 			NFS_DPF(ASYNCIO,
 				("nfs_asyncio: waiting for mount %p queue to drain\n", nmp));
 			nmp->nm_bufqwant = TRUE;
 			error = tsleep(&nmp->nm_bufq, slpflag | PRIBIO,
 				       "nfsaio", slptimeo);
 			if (error) {
 				if (nfs_sigintr(nmp, NULL, bp->b_proc))
 					return (EINTR);
 				if (slpflag == PCATCH) {
 					slpflag = 0;
 					slptimeo = 2 * hz;
 				}
 			}
 			/*
 			 * We might have lost our iod while sleeping,
 			 * so check and loop if nescessary.
 			 */
 			if (nmp->nm_bufqiods == 0) {
 				NFS_DPF(ASYNCIO,
 					("nfs_asyncio: no iods after mount %p queue was drained, looping\n", nmp));
 				goto again;
 			}
 		}
 
 		if (bp->b_flags & B_READ) {
 			if (bp->b_rcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_rcred = cred;
 			}
 		} else {
 			bp->b_flags |= B_WRITEINPROG;
 			if (bp->b_wcred == NOCRED && cred != NOCRED) {
 				crhold(cred);
 				bp->b_wcred = cred;
 			}
 		}
 
 		TAILQ_INSERT_TAIL(&nmp->nm_bufq, bp, b_freelist);
 		nmp->nm_bufqlen++;
 		return (0);
 	}
 
 	/*
 	 * All the iods are busy on other mounts, so return EIO to
 	 * force the caller to process the i/o synchronously.
 	 */
 	NFS_DPF(ASYNCIO, ("nfs_asyncio: no iods available, i/o is synchronous\n"));
 	return (EIO);
 }
 
 /*
  * Do an I/O operation to/from a cache block. This may be called
  * synchronously or from an nfsiod.
  */
 int
 nfs_doio(bp, cr, p)
 	register struct buf *bp;
 	struct ucred *cr;
 	struct proc *p;
 {
 	register struct uio *uiop;
 	register struct vnode *vp;
 	struct nfsnode *np;
 	struct nfsmount *nmp;
 	int error = 0, diff, len, iomode, must_commit = 0;
 	struct uio uio;
 	struct iovec io;
 
 	vp = bp->b_vp;
 	np = VTONFS(vp);
 	nmp = VFSTONFS(vp->v_mount);
 	uiop = &uio;
 	uiop->uio_iov = &io;
 	uiop->uio_iovcnt = 1;
 	uiop->uio_segflg = UIO_SYSSPACE;
 	uiop->uio_procp = p;
 
 	/*
 	 * Historically, paging was done with physio, but no more.
 	 */
 	if (bp->b_flags & B_PHYS) {
 	    /*
 	     * ...though reading /dev/drum still gets us here.
 	     */
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    /* mapping was done by vmapbuf() */
 	    io.iov_base = bp->b_data;
 	    uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 	    if (bp->b_flags & B_READ) {
 		uiop->uio_rw = UIO_READ;
 		nfsstats.read_physios++;
 		error = nfs_readrpc(vp, uiop, cr);
 	    } else {
 		int com;
 
 		iomode = NFSV3WRITE_DATASYNC;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_physios++;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &com);
 	    }
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else if (bp->b_flags & B_READ) {
 	    io.iov_len = uiop->uio_resid = bp->b_bcount;
 	    io.iov_base = bp->b_data;
 	    uiop->uio_rw = UIO_READ;
 	    switch (vp->v_type) {
 	    case VREG:
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE;
 		nfsstats.read_bios++;
 		error = nfs_readrpc(vp, uiop, cr);
 		if (!error) {
 		    bp->b_validoff = 0;
 		    if (uiop->uio_resid) {
 			/*
 			 * If len > 0, there is a hole in the file and
 			 * no writes after the hole have been pushed to
 			 * the server yet.
 			 * Just zero fill the rest of the valid area.
 			 */
 			diff = bp->b_bcount - uiop->uio_resid;
 			len = np->n_size - (((u_quad_t)bp->b_blkno) * DEV_BSIZE
 				+ diff);
 			if (len > 0) {
 			    len = min(len, uiop->uio_resid);
 			    bzero((char *)bp->b_data + diff, len);
 			    bp->b_validend = diff + len;
 			} else
 			    bp->b_validend = diff;
 		    } else
 			bp->b_validend = bp->b_bcount;
 		}
 		if (p && (vp->v_flag & VTEXT) &&
 			(((nmp->nm_flag & NFSMNT_NQNFS) &&
 			  NQNFS_CKINVALID(vp, np, ND_READ) &&
 			  np->n_lrev != np->n_brev) ||
 			 (!(nmp->nm_flag & NFSMNT_NQNFS) &&
 			  np->n_mtime != np->n_vattr.va_mtime.tv_sec))) {
 			uprintf("Process killed due to text file modification\n");
 			psignal(p, SIGKILL);
 			p->p_flag |= P_NOSWAP;
 		}
 		break;
 	    case VLNK:
 		uiop->uio_offset = (off_t)0;
 		nfsstats.readlink_bios++;
 		error = nfs_readlinkrpc(vp, uiop, cr);
 		break;
 	    case VDIR:
 		nfsstats.readdir_bios++;
 		uiop->uio_offset = ((u_quad_t)bp->b_lblkno) * NFS_DIRBLKSIZ;
 		if (nmp->nm_flag & NFSMNT_RDIRPLUS) {
 			error = nfs_readdirplusrpc(vp, uiop, cr);
 			if (error == NFSERR_NOTSUPP)
 				nmp->nm_flag &= ~NFSMNT_RDIRPLUS;
 		}
 		if ((nmp->nm_flag & NFSMNT_RDIRPLUS) == 0)
 			error = nfs_readdirrpc(vp, uiop, cr);
 		break;
 	    default:
 		printf("nfs_doio:  type %x unexpected\n",vp->v_type);
 		break;
 	    };
 	    if (error) {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = error;
 	    }
 	} else {
 	    if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size)
 		bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE);
 
 	    if (bp->b_dirtyend > bp->b_dirtyoff) {
 		io.iov_len = uiop->uio_resid = bp->b_dirtyend
 		    - bp->b_dirtyoff;
 		uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE
 		    + bp->b_dirtyoff;
 		io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 		uiop->uio_rw = UIO_WRITE;
 		nfsstats.write_bios++;
 		if ((bp->b_flags & (B_ASYNC | B_NEEDCOMMIT | B_NOCACHE | B_CLUSTER)) == B_ASYNC)
 		    iomode = NFSV3WRITE_UNSTABLE;
 		else
 		    iomode = NFSV3WRITE_FILESYNC;
 		bp->b_flags |= B_WRITEINPROG;
 		error = nfs_writerpc(vp, uiop, cr, &iomode, &must_commit);
 		if (!error && iomode == NFSV3WRITE_UNSTABLE) {
 		    bp->b_flags |= B_NEEDCOMMIT;
 		    if (bp->b_dirtyoff == 0
 			&& bp->b_dirtyend == bp->b_bufsize)
 			bp->b_flags |= B_CLUSTEROK;
 		} else
 		    bp->b_flags &= ~B_NEEDCOMMIT;
 		bp->b_flags &= ~B_WRITEINPROG;
 
 		/*
 		 * For an interrupted write, the buffer is still valid
 		 * and the write hasn't been pushed to the server yet,
 		 * so we can't set B_ERROR and report the interruption
 		 * by setting B_EINTR. For the B_ASYNC case, B_EINTR
 		 * is not relevant, so the rpc attempt is essentially
 		 * a noop.  For the case of a V3 write rpc not being
 		 * committed to stable storage, the block is still
 		 * dirty and requires either a commit rpc or another
 		 * write rpc with iomode == NFSV3WRITE_FILESYNC before
 		 * the block is reused. This is indicated by setting
 		 * the B_DELWRI and B_NEEDCOMMIT flags.
 		 */
     		if (error == EINTR
 		    || (!error && (bp->b_flags & B_NEEDCOMMIT))) {
+			int s;
+
 			bp->b_flags &= ~(B_INVAL|B_NOCACHE);
 			++numdirtybuffers;
 			bp->b_flags |= B_DELWRI;
+			s = splbio();
 			reassignbuf(bp, vp);
+			splx(s);
 			if ((bp->b_flags & B_ASYNC) == 0)
 			    bp->b_flags |= B_EINTR;
 	    	} else {
 			if (error) {
 				bp->b_flags |= B_ERROR;
 				bp->b_error = np->n_error = error;
 				np->n_flag |= NWRITEERR;
 			}
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 		}
 	    } else {
 		bp->b_resid = 0;
 		biodone(bp);
 		return (0);
 	    }
 	}
 	bp->b_resid = uiop->uio_resid;
 	if (must_commit)
 		nfs_clearcommit(vp->v_mount);
 	biodone(bp);
 	return (error);
 }
Index: head/sys/nfsclient/nfs_vnops.c
===================================================================
--- head/sys/nfsclient/nfs_vnops.c	(revision 34265)
+++ head/sys/nfsclient/nfs_vnops.c	(revision 34266)
@@ -1,3296 +1,3303 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)nfs_vnops.c	8.16 (Berkeley) 5/27/95
- * $Id: nfs_vnops.c,v 1.79 1998/03/06 09:46:48 msmith Exp $
+ * $Id: nfs_vnops.c,v 1.80 1998/03/07 21:36:06 dyson Exp $
  */
 
 
 /*
  * vnode op calls for Sun NFS version 2 and 3
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_zone.h>
 
 #include <miscfs/fifofs/fifo.h>
 #include <miscfs/specfs/specdev.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsnode.h>
 #include <nfs/nfsmount.h>
 #include <nfs/xdr_subs.h>
 #include <nfs/nfsm_subs.h>
 #include <nfs/nqnfs.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 /*
  * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these
  * calls are not in getblk() and brelse() so that they would not be necessary
  * here.
  */
 #ifndef B_VMIO
 #define vfs_busy_pages(bp, f)
 #endif
 
 static int	nfsspec_read __P((struct vop_read_args *));
 static int	nfsspec_write __P((struct vop_write_args *));
 static int	nfsfifo_read __P((struct vop_read_args *));
 static int	nfsfifo_write __P((struct vop_write_args *));
 static int	nfsspec_close __P((struct vop_close_args *));
 static int	nfsfifo_close __P((struct vop_close_args *));
 #define nfs_poll vop_nopoll
 static int	nfs_flush __P((struct vnode *,struct ucred *,int,struct proc *,int));
 static int	nfs_setattrrpc __P((struct vnode *,struct vattr *,struct ucred *,struct proc *));
 static	int	nfs_lookup __P((struct vop_lookup_args *));
 static	int	nfs_create __P((struct vop_create_args *));
 static	int	nfs_mknod __P((struct vop_mknod_args *));
 static	int	nfs_open __P((struct vop_open_args *));
 static	int	nfs_close __P((struct vop_close_args *));
 static	int	nfs_access __P((struct vop_access_args *));
 static	int	nfs_getattr __P((struct vop_getattr_args *));
 static	int	nfs_setattr __P((struct vop_setattr_args *));
 static	int	nfs_read __P((struct vop_read_args *));
 static	int	nfs_mmap __P((struct vop_mmap_args *));
 static	int	nfs_fsync __P((struct vop_fsync_args *));
 static	int	nfs_remove __P((struct vop_remove_args *));
 static	int	nfs_link __P((struct vop_link_args *));
 static	int	nfs_rename __P((struct vop_rename_args *));
 static	int	nfs_mkdir __P((struct vop_mkdir_args *));
 static	int	nfs_rmdir __P((struct vop_rmdir_args *));
 static	int	nfs_symlink __P((struct vop_symlink_args *));
 static	int	nfs_readdir __P((struct vop_readdir_args *));
 static	int	nfs_bmap __P((struct vop_bmap_args *));
 static	int	nfs_strategy __P((struct vop_strategy_args *));
 static	int	nfs_lookitup __P((struct vnode *,char *,int,struct ucred *,struct proc *,struct nfsnode **));
 static	int	nfs_sillyrename __P((struct vnode *,struct vnode *,struct componentname *));
 static int	nfsspec_access __P((struct vop_access_args *));
 static int	nfs_readlink __P((struct vop_readlink_args *));
 static int	nfs_print __P((struct vop_print_args *));
 static int	nfs_advlock __P((struct vop_advlock_args *));
 static int	nfs_bwrite __P((struct vop_bwrite_args *));
 /*
  * Global vfs data structures for nfs
  */
 vop_t **nfsv2_vnodeop_p;
 static struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_abortop_desc,		(vop_t *) nfs_abortop },
 	{ &vop_access_desc,		(vop_t *) nfs_access },
 	{ &vop_advlock_desc,		(vop_t *) nfs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) nfs_bmap },
 	{ &vop_bwrite_desc,		(vop_t *) nfs_bwrite },
 	{ &vop_close_desc,		(vop_t *) nfs_close },
 	{ &vop_create_desc,		(vop_t *) nfs_create },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_getpages_desc,		(vop_t *) nfs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) nfs_putpages },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lease_desc,		(vop_t *) vop_null },
 	{ &vop_link_desc,		(vop_t *) nfs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_lookup_desc,		(vop_t *) nfs_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) nfs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) nfs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) nfs_mmap },
 	{ &vop_open_desc,		(vop_t *) nfs_open },
 	{ &vop_poll_desc,		(vop_t *) nfs_poll },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfs_read },
 	{ &vop_readdir_desc,		(vop_t *) nfs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) nfs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) nfs_remove },
 	{ &vop_rename_desc,		(vop_t *) nfs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) nfs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) nfs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) nfs_symlink },
 	{ &vop_write_desc,		(vop_t *) nfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
 	{ &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
 VNODEOP_SET(nfsv2_vnodeop_opv_desc);
 
 /*
  * Special device vnode ops
  */
 vop_t **spec_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsspec_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
 	{ &spec_nfsv2nodeop_p, nfsv2_specop_entries };
 VNODEOP_SET(spec_nfsv2nodeop_opv_desc);
 
 vop_t **fifo_nfsv2nodeop_p;
 static struct vnodeopv_entry_desc nfsv2_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_access_desc,		(vop_t *) nfsspec_access },
 	{ &vop_close_desc,		(vop_t *) nfsfifo_close },
 	{ &vop_fsync_desc,		(vop_t *) nfs_fsync },
 	{ &vop_getattr_desc,		(vop_t *) nfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) nfs_inactive },
 	{ &vop_lock_desc,		(vop_t *) vop_sharedlock },
 	{ &vop_print_desc,		(vop_t *) nfs_print },
 	{ &vop_read_desc,		(vop_t *) nfsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) nfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) nfs_setattr },
 	{ &vop_write_desc,		(vop_t *) nfsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
 	{ &fifo_nfsv2nodeop_p, nfsv2_fifoop_entries };
 VNODEOP_SET(fifo_nfsv2nodeop_opv_desc);
 
 static int	nfs_commit __P((struct vnode *vp, u_quad_t offset, int cnt,
 				struct ucred *cred, struct proc *procp));
 static int	nfs_mknodrpc __P((struct vnode *dvp, struct vnode **vpp,
 				  struct componentname *cnp,
 				  struct vattr *vap));
 static int	nfs_removerpc __P((struct vnode *dvp, char *name, int namelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renamerpc __P((struct vnode *fdvp, char *fnameptr,
 				   int fnamelen, struct vnode *tdvp,
 				   char *tnameptr, int tnamelen,
 				   struct ucred *cred, struct proc *proc));
 static int	nfs_renameit __P((struct vnode *sdvp,
 				  struct componentname *scnp,
 				  struct sillyrename *sp));
 
 /*
  * Global variables
  */
 extern u_long nfs_true, nfs_false;
 extern struct nfsstats nfsstats;
 extern nfstype nfsv3_type[9];
 struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
 struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON];
 int nfs_numasync = 0;
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register u_long *tl;
 	register caddr_t cp;
 	register int t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_long mode, rmode;
 	int v3 = NFS_ISV3(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG: case VDIR: case VLNK:
 			return (EROFS);
 		}
 	}
 	/*
 	 * For nfs v3, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about, but
 	 * this is better than just returning anything that is lying about
 	 * in the cache.
 	 */
 	if (v3) {
 		nfsstats.rpccnt[NFSPROC_ACCESS]++;
 		nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		if (ap->a_mode & VREAD)
 			mode = NFSV3ACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type == VDIR) {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND |
 					 NFSV3ACCESS_DELETE);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_LOOKUP;
 		} else {
 			if (ap->a_mode & VWRITE)
 				mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND);
 			if (ap->a_mode & VEXEC)
 				mode |= NFSV3ACCESS_EXECUTE;
 		}
 		*tl = txdr_unsigned(mode);
 		nfsm_request(vp, NFSPROC_ACCESS, ap->a_p, ap->a_cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (!error) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			rmode = fxdr_unsigned(u_long, *tl);
 			/*
 			 * The NFS V3 spec does not clarify whether or not
 			 * the returned access bits can be a superset of
 			 * the ones requested, so...
 			 */
 			if ((rmode & mode) != mode)
 				error = EACCES;
 		}
 		nfsm_reqdone;
 		return (error);
 	} else {
 		if (error = nfsspec_access(ap))
 			return (error);
 
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		if (ap->a_cred->cr_uid == 0 && (ap->a_mode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_procp = ap->a_p;
 
 			if (vp->v_type == VREG)
 				error = nfs_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = nfs_readdirrpc(vp, &auio, ap->a_cred);
 				free(bp, M_TEMP);
 			} else if (vp->v_type = VLNK)
 				error = nfs_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		}
 		return (error);
 	}
 }
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct vattr vattr;
 	int error;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 { printf("open eacces vtyp=%d\n",vp->v_type);
 		return (EACCES);
 }
 	/*
 	 * Get a valid lease. If cached data is stale, flush it.
 	 */
 	if (nmp->nm_flag & NFSMNT_NQNFS) {
 		if (NQNFS_CKINVALID(vp, np, ND_READ)) {
 		    do {
 			error = nqnfs_getlease(vp, ND_READ, ap->a_cred,
 			    ap->a_p);
 		    } while (error == NQNFS_EXPIRED);
 		    if (error) {
 			return (error);
 		    }
 		    if (np->n_lrev != np->n_brev ||
 			(np->n_flag & NQNFSNONCACHE)) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR) {
 				return (error);
 			}
 			np->n_brev = np->n_lrev;
 		    }
 		}
 	} else {
 		if (np->n_flag & NMODIFIED) {
 			if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 				ap->a_p, 1)) == EINTR) {
 				return (error);
 			}
 			np->n_attrstamp = 0;
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error) {
 				return (error);
 			}
 			np->n_mtime = vattr.va_mtime.tv_sec;
 		} else {
 			error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 			if (error) {
 				return (error);
 			}
 			if (np->n_mtime != vattr.va_mtime.tv_sec) {
 				if (vp->v_type == VDIR)
 					np->n_direofoffset = 0;
 				if ((error = nfs_vinvalbuf(vp, V_SAVE,
 					ap->a_cred, ap->a_p, 1)) == EINTR) {
 					return (error);
 				}
 				np->n_mtime = vattr.va_mtime.tv_sec;
 			}
 		}
 	}
 	if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
 		np->n_attrstamp = 0; /* For Open/Close consistency */
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to nfs_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NQNFS         - do nothing now, since 2 is dealt with via leases and
  *                     1 should be dealt with via an fsync() system call for
  *                     cases where write errors are important.
  */
 /* ARGSUSED */
 static int
 nfs_close(ap)
 	struct vop_close_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 
 	if (vp->v_type == VREG) {
 	    if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
 		(np->n_flag & NMODIFIED)) {
 		if (NFS_ISV3(vp)) {
 		    error = nfs_flush(vp, ap->a_cred, MNT_WAIT, ap->a_p, 0);
 		    np->n_flag &= ~NMODIFIED;
 		} else
 		    error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
 		np->n_attrstamp = 0;
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	}
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register caddr_t cp;
 	register u_long *tl;
 	register int t1, t2;
 	caddr_t bpos, dpos;
 	int error = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 	
 	/*
 	 * Update local times for special files.
 	 */
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	/*
 	 * First look in the cache.
 	 */
 	if (nfs_getattrcache(vp, ap->a_vap) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_GETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
 	if (!error)
 		nfsm_loadattr(vp, ap->a_vap);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (u_short)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
  			if (np->n_flag & NMODIFIED) {
  			    if (vap->va_size == 0)
  				error = nfs_vinvalbuf(vp, 0,
  					ap->a_cred, ap->a_p, 1);
  			    else
  				error = nfs_vinvalbuf(vp, V_SAVE,
  					ap->a_cred, ap->a_p, 1);
  			    if (error)
  				return (error);
  			}
  			tsize = np->n_size;
  			np->n_size = np->n_vattr.va_size = vap->va_size;
  			vnode_pager_setsize(vp, (u_long)np->n_size);
   		};
   	} else if ((vap->va_mtime.tv_sec != VNOVAL ||
 		vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) &&
 		vp->v_type == VREG &&
   		(error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
 		 ap->a_p, 1)) == EINTR)
 		return (error);
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, ap->a_p);
 	if (error && vap->va_size != VNOVAL) {
 		np->n_size = np->n_vattr.va_size = tsize;
 		vnode_pager_setsize(vp, (u_long)np->n_size);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(vp, vap, cred, procp)
 	register struct vnode *vp;
 	register struct vattr *vap;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register struct nfsv2_sattr *sp;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	u_long *tl;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_SETATTR]++;
 	nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3));
 	nfsm_fhtom(vp, v3);
 	if (v3) {
 		if (vap->va_mode != (u_short)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_mode);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_uid != (uid_t)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_uid);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_gid != (gid_t)VNOVAL) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			*tl = txdr_unsigned(vap->va_gid);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_size != VNOVAL) {
 			nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 			*tl++ = nfs_true;
 			txdr_hyper(&vap->va_size, tl);
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = nfs_false;
 		}
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			if (vap->va_atime.tv_sec != time.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_atime, tl);
 			} else {
 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
 			}
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			if (vap->va_mtime.tv_sec != time.tv_sec) {
 				nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 				*tl++ = txdr_unsigned(NFSV3SATTRTIME_TOCLIENT);
 				txdr_nfsv3time(&vap->va_mtime, tl);
 			} else {
 				nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 				*tl = txdr_unsigned(NFSV3SATTRTIME_TOSERVER);
 			}
 		} else {
 			nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 			*tl = txdr_unsigned(NFSV3SATTRTIME_DONTCHANGE);
 		}
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		*tl = nfs_false;
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		if (vap->va_mode == (u_short)VNOVAL)
 			sp->sa_mode = VNOVAL;
 		else
 			sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode);
 		if (vap->va_uid == (uid_t)VNOVAL)
 			sp->sa_uid = VNOVAL;
 		else
 			sp->sa_uid = txdr_unsigned(vap->va_uid);
 		if (vap->va_gid == (gid_t)VNOVAL)
 			sp->sa_gid = VNOVAL;
 		else
 			sp->sa_gid = txdr_unsigned(vap->va_gid);
 		sp->sa_size = txdr_unsigned(vap->va_size);
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(vp, NFSPROC_SETATTR, procp, cred);
 	if (v3) {
 		nfsm_wcc_data(vp, wccflag);
 	} else
 		nfsm_loadattr(vp, (struct vattr *)0);
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct componentname *cnp = ap->a_cnp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vnode **vpp = ap->a_vpp;
 	register int flags = cnp->cn_flags;
 	register struct vnode *newvp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct nfsmount *nmp;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	long len;
 	nfsfh_t *fhp;
 	struct nfsnode *np;
 	int lockparent, wantparent, error = 0, attrflag, fhsize;
 	int v3 = NFS_ISV3(dvp);
 	struct proc *p = cnp->cn_proc;
 
 	if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	*vpp = NULLVP;
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 	nmp = VFSTONFS(dvp->v_mount);
 	np = VTONFS(dvp);
 	if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
 		struct vattr vattr;
 		int vpid;
 
 		if (error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, p)) {
 			*vpp = NULLVP;
 			return (error);
 		}
 
 		newvp = *vpp;
 		vpid = newvp->v_id;
 		/*
 		 * See the comment starting `Step through' in ufs/ufs_lookup.c
 		 * for an explanation of the locking protocol
 		 */
 		if (dvp == newvp) {
 			VREF(newvp);
 			error = 0;
 		} else if (flags & ISDOTDOT) {
 			VOP_UNLOCK(dvp, 0, p);
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!error && lockparent && (flags & ISLASTCN))
 				error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		} else {
 			error = vget(newvp, LK_EXCLUSIVE, p);
 			if (!lockparent || error || !(flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		if (!error) {
 			if (vpid == newvp->v_id) {
 			   if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred, p)
 			    && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) {
 				nfsstats.lookupcache_hits++;
 				if (cnp->cn_nameiop != LOOKUP &&
 				    (flags & ISLASTCN))
 					cnp->cn_flags |= SAVENAME;
 				return (0);
 			   }
 			   cache_purge(newvp);
 			}
 			vput(newvp);
 			if (lockparent && dvp != newvp && (flags & ISLASTCN))
 				VOP_UNLOCK(dvp, 0, p);
 		}
 		error = vn_lock(dvp, LK_EXCLUSIVE, p);
 		*vpp = NULLVP;
 		if (error)
 			return (error);
 	}
 	error = 0;
 	newvp = NULLVP;
 	nfsstats.lookupcache_misses++;
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	len = cnp->cn_namelen;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
 	if (error) {
 		nfsm_postop_attr(dvp, attrflag);
 		m_freem(mrep);
 		goto nfsmout;
 	}
 	nfsm_getfh(fhp, fhsize, v3);
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, fhp, fhsize)) {
 			m_freem(mrep);
 			return (EISDIR);
 		}
 		if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) {
 			m_freem(mrep);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			nfsm_postop_attr(dvp, attrflag);
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 		*vpp = newvp;
 		m_freem(mrep);
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(dvp, 0, p);
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(dvp, 0, p);
 		error = nfs_nget(dvp->v_mount, fhp, fhsize, &np);
 		if (error) {
 			vn_lock(dvp, LK_EXCLUSIVE + LK_RETRY, p);
 			return (error);
 		}
 		newvp = NFSTOV(np);
 		if (lockparent && (flags & ISLASTCN) &&
 		    (error = vn_lock(dvp, LK_EXCLUSIVE, p))) {
 		    	vput(newvp);
 			return (error);
 		}
 	} else if (NFS_CMPFH(np, fhp, fhsize)) {
 		VREF(dvp);
 		newvp = dvp;
 	} else {
 		if (error = nfs_nget(dvp->v_mount, fhp, fhsize, &np)) {
 			m_freem(mrep);
 			return (error);
 		}
 		if (!lockparent || !(flags & ISLASTCN))
 			VOP_UNLOCK(dvp, 0, p);
 		newvp = NFSTOV(np);
 	}
 	if (v3) {
 		nfsm_postop_attr(newvp, attrflag);
 		nfsm_postop_attr(dvp, attrflag);
 	} else
 		nfsm_loadattr(newvp, (struct vattr *)0);
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
 		np->n_ctime = np->n_vattr.va_ctime.tv_sec;
 		cache_enter(dvp, newvp, cnp);
 	}
 	*vpp = newvp;
 	nfsm_reqdone;
 	if (error) {
 		if (newvp != NULLVP) {
 			vrele(newvp);
 			*vpp = NULLVP;
 		}
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN) && error == ENOENT) {
 			if (!lockparent)
 				VOP_UNLOCK(dvp, 0, p);
 			if (dvp->v_mount->mnt_flag & MNT_RDONLY)
 				error = EROFS;
 			else
 				error = EJUSTRETURN;
 		}
 		if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 			cnp->cn_flags |= SAVENAME;
 	}
 	return (error);
 }
 
 /*
  * nfs read call.
  * Just call nfs_bioread() to do the work.
  */
 static int
 nfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VREG)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred, 0));
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EPERM);
 	return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred, 0));
 }
 
 /*
  * Do a readlink rpc.
  * Called by nfs_doio() from below the buffer cache.
  */
 int
 nfs_readlinkrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, len, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	nfsstats.rpccnt[NFSPROC_READLINK]++;
 	nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3));
 	nfsm_fhtom(vp, v3);
 	nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
 	if (v3)
 		nfsm_postop_attr(vp, attrflag);
 	if (!error) {
 		nfsm_strsiz(len, NFS_MAXPATHLEN);
 		nfsm_mtouio(uiop, len);
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 nfs_readrpc(vp, uiop, cred)
 	register struct vnode *vp;
 	struct uio *uiop;
 	struct ucred *cred;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp;
 	int error = 0, len, retlen, tsiz, eof, attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	eof = 0;
 #endif
 	nmp = VFSTONFS(vp->v_mount);
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > 0xffffffff && !v3)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_READ]++;
 		len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3);
 		nfsm_fhtom(vp, v3);
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED * 3);
 		if (v3) {
 			txdr_hyper(&uiop->uio_offset, tl);
 			*(tl + 2) = txdr_unsigned(len);
 		} else {
 			*tl++ = txdr_unsigned(uiop->uio_offset);
 			*tl++ = txdr_unsigned(len);
 			*tl = 0;
 		}
 		nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (error) {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 			eof = fxdr_unsigned(int, *(tl + 1));
 		} else
 			nfsm_loadattr(vp, (struct vattr *)0);
 		nfsm_strsiz(retlen, nmp->nm_rsize);
 		nfsm_mtouio(uiop, retlen);
 		m_freem(mrep);
 		tsiz -= retlen;
 		if (v3) {
 			if (eof || retlen == 0)
 				tsiz = 0;
 		} else if (retlen < len)
 			tsiz = 0;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 nfs_writerpc(vp, uiop, cred, iomode, must_commit)
 	register struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 	int *iomode, *must_commit;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register int t1, t2, backup;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit;
 	int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC;
 
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1)
 		panic("nfs: writerpc iovcnt > 1");
 #endif
 	*must_commit = 0;
 	tsiz = uiop->uio_resid;
 	if (uiop->uio_offset + tsiz > 0xffffffff && !v3)
 		return (EFBIG);
 	while (tsiz > 0) {
 		nfsstats.rpccnt[NFSPROC_WRITE]++;
 		len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
 		nfsm_reqhead(vp, NFSPROC_WRITE,
 			NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
 			txdr_hyper(&uiop->uio_offset, tl);
 			tl += 2;
 			*tl++ = txdr_unsigned(len);
 			*tl++ = txdr_unsigned(*iomode);
 		} else {
 			nfsm_build(tl, u_long *, 4 * NFSX_UNSIGNED);
 			*++tl = txdr_unsigned(uiop->uio_offset);
 			tl += 2;
 		}
 		*tl = txdr_unsigned(len);
 		nfsm_uiotom(uiop, len);
 		nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
 		if (v3) {
 			wccflag = NFSV3_WCCCHK;
 			nfsm_wcc_data(vp, wccflag);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED +
 					NFSX_V3WRITEVERF);
 				rlen = fxdr_unsigned(int, *tl++);
 				if (rlen == 0) {
 					error = NFSERR_IO;
 					break;
 				} else if (rlen < len) {
 					backup = len - rlen;
 					uiop->uio_iov->iov_base -= backup;
 					uiop->uio_iov->iov_len += backup;
 					uiop->uio_offset -= backup;
 					uiop->uio_resid += backup;
 					len = rlen;
 				}
 				commit = fxdr_unsigned(int, *tl++);
 
 				/*
 				 * Return the lowest committment level
 				 * obtained by any of the RPCs.
 				 */
 				if (committed == NFSV3WRITE_FILESYNC)
 					committed = commit;
 				else if (committed == NFSV3WRITE_DATASYNC &&
 					commit == NFSV3WRITE_UNSTABLE)
 					committed = commit;
 				if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0) {
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				    nmp->nm_flag |= NFSMNT_HASWRITEVERF;
 				} else if (bcmp((caddr_t)tl,
 				    (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) {
 				    *must_commit = 1;
 				    bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 					NFSX_V3WRITEVERF);
 				}
 			}
 		} else
 		    nfsm_loadattr(vp, (struct vattr *)0);
 		if (wccflag)
 		    VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.tv_sec;
 		m_freem(mrep);
 		tsiz -= len;
 	}
 nfsmout:
 	if (vp->v_mount->mnt_flag & MNT_ASYNC)
 		committed = NFSV3WRITE_FILESYNC;
 	*iomode = committed;
 	if (error)
 		uiop->uio_resid = tsiz;
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(dvp, vpp, cnp, vap)
 	register struct vnode *dvp;
 	register struct vnode **vpp;
 	register struct componentname *cnp;
 	register struct vattr *vap;
 {
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vattr vattr;
 	char *cp2;
 	caddr_t bpos, dpos;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	u_long rdev;
 	int v3 = NFS_ISV3(dvp);
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = txdr_unsigned(vap->va_rdev);
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = 0xffffffff;
 	else {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (EOPNOTSUPP);
 	}
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	nfsstats.rpccnt[NFSPROC_MKNOD]++;
 	nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED +
 		+ nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED + NFSX_V3SRVSATTR);
 		*tl++ = vtonfsv3_type(vap->va_type);
 		sp3 = (struct nfsv3_sattr *)tl;
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 		if (vap->va_type == VCHR || vap->va_type == VBLK) {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = txdr_unsigned(major(vap->va_rdev));
 			*tl = txdr_unsigned(minor(vap->va_rdev));
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = rdev;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (newvp)
 			vput(newvp);
 	} else {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *newvp;
 	int error;
 
 	error = nfs_mknodrpc(ap->a_dvp, &newvp, ap->a_cnp, ap->a_vap);
 	if (!error)
 		vput(newvp);
 	return (error);
 }
 
 static u_long create_verf;
 /*
  * nfs file create call
  */
 static int
 nfs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 again:
 	nfsstats.rpccnt[NFSPROC_CREATE]++;
 	nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED +
 		nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(tl, u_long *, NFSX_UNSIGNED);
 		if (fmode & O_EXCL) {
 		    *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE);
 		    nfsm_build(tl, u_long *, NFSX_V3CREATEVERF);
 #ifdef INET
 		    if (!TAILQ_EMPTY(&in_ifaddrhead))
 			*tl++ = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr.s_addr;
 		    else
 #endif
 			*tl++ = create_verf;
 		    *tl = ++create_verf;
 		} else {
 		    *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED);
 		    nfsm_build(tl, u_long *, NFSX_V3SRVSATTR);
 		    sp3 = (struct nfsv3_sattr *)tl;
 		    nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 		}
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = 0;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
 	if (!error) {
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 		if (!gotvp) {
 			if (newvp) {
 				vput(newvp);
 				newvp = (struct vnode *)0;
 			}
 			error = nfs_lookitup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc, &np);
 			if (!error)
 				newvp = NFSTOV(np);
 		}
 	}
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	if (error) {
 		if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 		if (newvp)
 			vput(newvp);
 	} else if (v3 && (fmode & O_EXCL))
 		error = nfs_setattrrpc(newvp, vap, cnp->cn_cred, cnp->cn_proc);
 	if (!error) {
 		if (cnp->cn_flags & MAKEENTRY)
 			cache_enter(dvp, newvp, cnp);
 		*ap->a_vpp = newvp;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_dvp;
 		struct vnode * a_vp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 #ifndef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("nfs_remove: no name");
 	if (vp->v_usecount < 1)
 		panic("nfs_remove: bad v_usecount");
 #endif
 	if (vp->v_usecount == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred, cnp->cn_proc) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
 		/* Do the rpc */
 		if (error != EINTR)
 			error = nfs_removerpc(dvp, cnp->cn_nameptr,
 				cnp->cn_namelen, cnp->cn_cred, cnp->cn_proc);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	np->n_attrstamp = 0;
 	vput(dvp);
 	if (vp == dvp)
 		vrele(vp);
 	else
 		vput(vp);
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 nfs_removeit(sp)
 	register struct sillyrename *sp;
 {
 
 	return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		(struct proc *)0));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and nfs_removeit().
  */
 static int
 nfs_removerpc(dvp, name, namelen, cred, proc)
 	register struct vnode *dvp;
 	char *name;
 	int namelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_REMOVE]++;
 	nfsm_reqhead(dvp, NFSPROC_REMOVE,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_REMOVE, proc, cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	register struct vnode *fvp = ap->a_fvp;
 	register struct vnode *tvp = ap->a_tvp;
 	register struct vnode *fdvp = ap->a_fdvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *tcnp = ap->a_tcnp;
 	register struct componentname *fcnp = ap->a_fcnp;
 	int error;
 
 #ifndef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("nfs_rename: no name");
 #endif
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && tvp->v_usecount > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 		tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 		tcnp->cn_proc);
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(sdvp, scnp, sp)
 	struct vnode *sdvp;
 	struct componentname *scnp;
 	register struct sillyrename *sp;
 {
 	return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen,
 		sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_proc));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(fdvp, fnameptr, fnamelen, tdvp, tnameptr, tnamelen, cred, proc)
 	register struct vnode *fdvp;
 	char *fnameptr;
 	int fnamelen;
 	register struct vnode *tdvp;
 	char *tnameptr;
 	int tnamelen;
 	struct ucred *cred;
 	struct proc *proc;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(fdvp);
 
 	nfsstats.rpccnt[NFSPROC_RENAME]++;
 	nfsm_reqhead(fdvp, NFSPROC_RENAME,
 		(NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) +
 		nfsm_rndup(tnamelen));
 	nfsm_fhtom(fdvp, v3);
 	nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN);
 	nfsm_request(fdvp, NFSPROC_RENAME, proc, cred);
 	if (v3) {
 		nfsm_wcc_data(fdvp, fwccflag);
 		nfsm_wcc_data(tdvp, twccflag);
 	}
 	nfsm_reqdone;
 	VTONFS(fdvp)->n_flag |= NMODIFIED;
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!fwccflag)
 		VTONFS(fdvp)->n_attrstamp = 0;
 	if (!twccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(vp);
 
 	if (vp->v_mount != tdvp->v_mount) {
 		VOP_ABORTOP(vp, cnp);
 		if (tdvp == vp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		return (EXDEV);
 	}
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, cnp->cn_cred, MNT_WAIT, cnp->cn_proc);
 
 	nfsstats.rpccnt[NFSPROC_LINK]++;
 	nfsm_reqhead(vp, NFSPROC_LINK,
 		NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(vp, v3);
 	nfsm_fhtom(tdvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(vp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		nfsm_postop_attr(vp, attrflag);
 		nfsm_wcc_data(tdvp, wccflag);
 	}
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(tdvp)->n_flag |= NMODIFIED;
 	if (!attrflag)
 		VTONFS(vp)->n_attrstamp = 0;
 	if (!wccflag)
 		VTONFS(tdvp)->n_attrstamp = 0;
 	vput(tdvp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vnode *newvp = (struct vnode *)0;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_SYMLINK]++;
 	slen = strlen(ap->a_target);
 	nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED +
 	    nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid,
 			cnp->cn_cred->cr_gid);
 	}
 	nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
 	if (!v3) {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
 		sp->sa_size = -1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
 	if (v3) {
 		if (!error)
 			nfsm_mtofh(dvp, newvp, v3, gotvp);
 		nfsm_wcc_data(dvp, wccflag);
 	}
 	nfsm_reqdone;
 	if (newvp)
 		vput(newvp);
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	vput(dvp);
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == EEXIST)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct nfsv2_sattr *sp;
 	register struct nfsv3_sattr *sp3;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register int len;
 	struct nfsnode *np = (struct nfsnode *)0;
 	struct vnode *newvp = (struct vnode *)0;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	int gotvp = 0;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	struct vattr vattr;
 	int v3 = NFS_ISV3(dvp);
 
 	if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
 		VOP_ABORTOP(dvp, cnp);
 		vput(dvp);
 		return (error);
 	}
 	len = cnp->cn_namelen;
 	nfsstats.rpccnt[NFSPROC_MKDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_MKDIR,
 	  NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
 	if (v3) {
 		nfsm_build(sp3, struct nfsv3_sattr *, NFSX_V3SRVSATTR);
 		nfsm_v3sattr(sp3, vap, cnp->cn_cred->cr_uid, vattr.va_gid);
 	} else {
 		nfsm_build(sp, struct nfsv2_sattr *, NFSX_V2SATTR);
 		sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode);
 		sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
 		sp->sa_gid = txdr_unsigned(vattr.va_gid);
 		sp->sa_size = -1;
 		txdr_nfsv2time(&vap->va_atime, &sp->sa_atime);
 		txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime);
 	}
 	nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
 	if (!error)
 		nfsm_mtofh(dvp, newvp, v3, gotvp);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	/*
 	 * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
 	 * if we can succeed in looking up the directory.
 	 */
 	if (error == EEXIST || (!error && !gotvp)) {
 		if (newvp) {
 			vrele(newvp);
 			newvp = (struct vnode *)0;
 		}
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred,
 			cnp->cn_proc, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vrele(newvp);
 	} else
 		*ap->a_vpp = newvp;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	vput(dvp);
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct vnode *dvp = ap->a_dvp;
 	register struct componentname *cnp = ap->a_cnp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_RMDIR]++;
 	nfsm_reqhead(dvp, NFSPROC_RMDIR,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
 	if (v3)
 		nfsm_wcc_data(dvp, wccflag);
 	nfsm_reqdone;
 	zfree(namei_zone, cnp->cn_pnbuf);
 	VTONFS(dvp)->n_flag |= NMODIFIED;
 	if (!wccflag)
 		VTONFS(dvp)->n_attrstamp = 0;
 	cache_purge(dvp);
 	cache_purge(vp);
 	vput(vp);
 	vput(dvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	register struct uio *uio = ap->a_uio;
 	int tresid, error;
 	struct vattr vattr;
 
 	if (vp->v_type != VDIR)
 		return (EPERM);
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
 			if (NQNFS_CKCACHABLE(vp, ND_READ)) {
 				nfsstats.direofcache_hits++;
 				return (0);
 			}
 		} else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
 			np->n_mtime == vattr.va_mtime.tv_sec) {
 			nfsstats.direofcache_hits++;
 			return (0);
 		}
 	}
 
 	/*
 	 * Call nfs_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = nfs_bioread(vp, uio, 0, ap->a_cred, 0);
 
 	if (!error && uio->uio_resid == tresid)
 		nfsstats.direofcache_misses++;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by nfs_doio().
  */
 int
 nfs_readdirrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp);
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1;
 	int attrflag;
 	int v3 = NFS_ISV3(vp);
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (NFS_DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (NFS_DIRBLKSIZ - 1)))
 		panic("nfs readdirrpc bad uio");
 #endif
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIR]++;
 		nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) +
 			NFSX_READDIR(v3));
 		nfsm_fhtom(vp, v3);
 		if (v3) {
 			nfsm_build(tl, u_long *, 5 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 			*tl++ = cookie.nfsuquad[1];
 			*tl++ = dnp->n_cookieverf.nfsuquad[0];
 			*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		} else {
 			nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
 			*tl++ = cookie.nfsuquad[0];
 		}
 		*tl = txdr_unsigned(nmp->nm_readdirsize);
 		nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
 		if (v3) {
 			nfsm_postop_attr(vp, attrflag);
 			if (!error) {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 				dnp->n_cookieverf.nfsuquad[0] = *tl++;
 				dnp->n_cookieverf.nfsuquad[1] = *tl;
 			} else {
 				m_freem(mrep);
 				goto nfsmout;
 			}
 		}
 		nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 		more_dirs = fxdr_unsigned(int, *tl);
 	
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			if (v3) {
 				nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 				fxdr_hyper(tl, &fileno);
 				len = fxdr_unsigned(int, *(tl + 2));
 			} else {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 				fileno = fxdr_unsigned(u_quad_t, *tl++);
 				len = fxdr_unsigned(int, *tl);
 			}
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination */
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';	/* null terminate */
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			if (v3) {
 				nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			} else {
 				nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
 			}
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				if (v3)
 					cookie.nfsuquad[1] = *tl++;
 			} else if (v3)
 				tl += 2;
 			else
 				tl++;
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc().
  */
 int
 nfs_readdirplusrpc(vp, uiop, cred)
 	struct vnode *vp;
 	register struct uio *uiop;
 	struct ucred *cred;
 {
 	register int len, left;
 	register struct dirent *dp;
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	register struct vnode *newvp;
 	register nfsuint64 *cookiep;
 	caddr_t bpos, dpos, cp2, dpossav1, dpossav2;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2, *mdsav1, *mdsav2;
 	struct nameidata nami, *ndp = &nami;
 	struct componentname *cnp = &ndp->ni_cnd;
 	nfsuint64 cookie;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	struct nfsnode *dnp = VTONFS(vp), *np;
 	nfsfh_t *fhp;
 	u_quad_t fileno;
 	int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i;
 	int attrflag, fhsize;
 
 #ifndef nolint
 	dp = (struct dirent *)0;
 #endif
 #ifndef DIAGNOSTIC
 	if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) ||
 		(uiop->uio_resid & (DIRBLKSIZ - 1)))
 		panic("nfs readdirplusrpc bad uio");
 #endif
 	ndp->ni_dvp = vp;
 	newvp = NULLVP;
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep)
 		cookie = *cookiep;
 	else
 		return (NFSERR_BAD_COOKIE);
 	/*
 	 * Loop around doing readdir rpc's of size nm_readdirsize
 	 * truncated to a multiple of DIRBLKSIZ.
 	 * The stopping criteria is EOF or buffer full.
 	 */
 	while (more_dirs && bigenough) {
 		nfsstats.rpccnt[NFSPROC_READDIRPLUS]++;
 		nfsm_reqhead(vp, NFSPROC_READDIRPLUS,
 			NFSX_FH(1) + 6 * NFSX_UNSIGNED);
 		nfsm_fhtom(vp, 1);
  		nfsm_build(tl, u_long *, 6 * NFSX_UNSIGNED);
 		*tl++ = cookie.nfsuquad[0];
 		*tl++ = cookie.nfsuquad[1];
 		*tl++ = dnp->n_cookieverf.nfsuquad[0];
 		*tl++ = dnp->n_cookieverf.nfsuquad[1];
 		*tl++ = txdr_unsigned(nmp->nm_readdirsize);
 		*tl = txdr_unsigned(nmp->nm_rsize);
 		nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_procp, cred);
 		nfsm_postop_attr(vp, attrflag);
 		if (error) {
 			m_freem(mrep);
 			goto nfsmout;
 		}
 		nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 		dnp->n_cookieverf.nfsuquad[0] = *tl++;
 		dnp->n_cookieverf.nfsuquad[1] = *tl++;
 		more_dirs = fxdr_unsigned(int, *tl);
 
 		/* loop thru the dir entries, doctoring them to 4bsd form */
 		while (more_dirs && bigenough) {
 			nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			fxdr_hyper(tl, &fileno);
 			len = fxdr_unsigned(int, *(tl + 2));
 			if (len <= 0 || len > NFS_MAXNAMLEN) {
 				error = EBADRPC;
 				m_freem(mrep);
 				goto nfsmout;
 			}
 			tlen = nfsm_rndup(len);
 			if (tlen == len)
 				tlen += 4;	/* To ensure null termination*/
 			left = DIRBLKSIZ - blksiz;
 			if ((tlen + DIRHDSIZ) > left) {
 				dp->d_reclen += left;
 				uiop->uio_iov->iov_base += left;
 				uiop->uio_iov->iov_len -= left;
 				uiop->uio_offset += left;
 				uiop->uio_resid -= left;
 				blksiz = 0;
 			}
 			if ((tlen + DIRHDSIZ) > uiop->uio_resid)
 				bigenough = 0;
 			if (bigenough) {
 				dp = (struct dirent *)uiop->uio_iov->iov_base;
 				dp->d_fileno = (int)fileno;
 				dp->d_namlen = len;
 				dp->d_reclen = tlen + DIRHDSIZ;
 				dp->d_type = DT_UNKNOWN;
 				blksiz += dp->d_reclen;
 				if (blksiz == DIRBLKSIZ)
 					blksiz = 0;
 				uiop->uio_offset += DIRHDSIZ;
 				uiop->uio_resid -= DIRHDSIZ;
 				uiop->uio_iov->iov_base += DIRHDSIZ;
 				uiop->uio_iov->iov_len -= DIRHDSIZ;
 				cnp->cn_nameptr = uiop->uio_iov->iov_base;
 				cnp->cn_namelen = len;
 				nfsm_mtouio(uiop, len);
 				cp = uiop->uio_iov->iov_base;
 				tlen -= len;
 				*cp = '\0';
 				uiop->uio_iov->iov_base += tlen;
 				uiop->uio_iov->iov_len -= tlen;
 				uiop->uio_offset += tlen;
 				uiop->uio_resid -= tlen;
 			} else
 				nfsm_adv(nfsm_rndup(len));
 			nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
 			if (bigenough) {
 				cookie.nfsuquad[0] = *tl++;
 				cookie.nfsuquad[1] = *tl++;
 			} else
 				tl += 2;
 
 			/*
 			 * Since the attributes are before the file handle
 			 * (sigh), we must skip over the attributes and then
 			 * come back and get them.
 			 */
 			attrflag = fxdr_unsigned(int, *tl);
 			if (attrflag) {
 			    dpossav1 = dpos;
 			    mdsav1 = md;
 			    nfsm_adv(NFSX_V3FATTR);
 			    nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			    doit = fxdr_unsigned(int, *tl);
 			    if (doit) {
 				nfsm_getfh(fhp, fhsize, 1);
 				if (NFS_CMPFH(dnp, fhp, fhsize)) {
 				    VREF(vp);
 				    newvp = vp;
 				    np = dnp;
 				} else {
 				    if (error = nfs_nget(vp->v_mount, fhp,
 					fhsize, &np))
 					doit = 0;
 				    else
 					newvp = NFSTOV(np);
 				}
 			    }
 			    if (doit) {
 				dpossav2 = dpos;
 				dpos = dpossav1;
 				mdsav2 = md;
 				md = mdsav1;
 				nfsm_loadattr(newvp, (struct vattr *)0);
 				dpos = dpossav2;
 				md = mdsav2;
 				dp->d_type =
 				    IFTODT(VTTOIF(np->n_vattr.va_type));
 				ndp->ni_vp = newvp;
 				cnp->cn_hash = 0;
 				for (cp = cnp->cn_nameptr, i = 1; i <= len;
 				    i++, cp++)
 				    cnp->cn_hash += (unsigned char)*cp * i;
 			        cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
 			    }
 			} else {
 			    /* Just skip over the file handle */
 			    nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			    i = fxdr_unsigned(int, *tl);
 			    nfsm_adv(nfsm_rndup(i));
 			}
 			if (newvp != NULLVP) {
 			    vrele(newvp);
 			    newvp = NULLVP;
 			}
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = fxdr_unsigned(int, *tl);
 		}
 		/*
 		 * If at end of rpc data, get the eof boolean
 		 */
 		if (!more_dirs) {
 			nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
 			more_dirs = (fxdr_unsigned(int, *tl) == 0);
 		}
 		m_freem(mrep);
 	}
 	/*
 	 * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
 	 * by increasing d_reclen for the last record.
 	 */
 	if (blksiz > 0) {
 		left = DIRBLKSIZ - blksiz;
 		dp->d_reclen += left;
 		uiop->uio_iov->iov_base += left;
 		uiop->uio_iov->iov_len -= left;
 		uiop->uio_offset += left;
 		uiop->uio_resid -= left;
 	}
 
 	/*
 	 * We are now either at the end of the directory or have filled the
 	 * block.
 	 */
 	if (bigenough)
 		dnp->n_direofoffset = uiop->uio_offset;
 	else {
 		if (uiop->uio_resid > 0)
 			printf("EEK! readdirplusrpc resid > 0\n");
 		cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1);
 		*cookiep = cookie;
 	}
 nfsmout:
 	if (newvp != NULLVP) {
 	        if (newvp == vp)
 			vrele(newvp);
 		else
 			vput(newvp);
 		newvp = NULLVP;
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(dvp, vp, cnp)
 	struct vnode *dvp, *vp;
 	struct componentname *cnp;
 {
 	register struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 #ifndef DIAGNOSTIC
 	if (vp->v_type == VDIR)
 		panic("nfs: sillyrename dir");
 #endif
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 		M_NFSREQ, M_WAITOK);
 	sp->s_cred = crdup(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* Fudge together a funny name */
 	pid = cnp->cn_proc->p_pid;
 	sp->s_namlen = sprintf(sp->s_name, ".nfsA%04x4.4", pid);
 
 	/* Try lookitups until we get one that isn't there */
 	while (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, (struct nfsnode **)0) == 0) {
 		sp->s_name[4]++;
 		if (sp->s_name[4] > 'z') {
 			error = EINVAL;
 			goto bad;
 		}
 	}
 	if (error = nfs_renameit(dvp, cnp, sp))
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_proc, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(dvp, name, len, cred, procp, npp)
 	register struct vnode *dvp;
 	char *name;
 	int len;
 	struct ucred *cred;
 	struct proc *procp;
 	struct nfsnode **npp;
 {
 	register u_long *tl;
 	register caddr_t cp;
 	register long t1, t2;
 	struct vnode *newvp = (struct vnode *)0;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, fhlen, attrflag;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	nfsfh_t *nfhp;
 	int v3 = NFS_ISV3(dvp);
 
 	nfsstats.rpccnt[NFSPROC_LOOKUP]++;
 	nfsm_reqhead(dvp, NFSPROC_LOOKUP,
 		NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len));
 	nfsm_fhtom(dvp, v3);
 	nfsm_strtom(name, len, NFS_MAXNAMLEN);
 	nfsm_request(dvp, NFSPROC_LOOKUP, procp, cred);
 	if (npp && !error) {
 		nfsm_getfh(nfhp, fhlen, v3);
 		if (*npp) {
 		    np = *npp;
 		    if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) {
 			free((caddr_t)np->n_fhp, M_NFSBIGFH);
 			np->n_fhp = &np->n_fh;
 		    } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH)
 			np->n_fhp =(nfsfh_t *)malloc(fhlen,M_NFSBIGFH,M_WAITOK);
 		    bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen);
 		    np->n_fhsize = fhlen;
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp, fhlen)) {
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np);
 		    if (error) {
 			m_freem(mrep);
 			return (error);
 		    }
 		    newvp = NFSTOV(np);
 		}
 		if (v3) {
 			nfsm_postop_attr(newvp, attrflag);
 			if (!attrflag && *npp == NULL) {
 				m_freem(mrep);
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 				return (ENOENT);
 			}
 		} else
 			nfsm_loadattr(newvp, (struct vattr *)0);
 	}
 	nfsm_reqdone;
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp)
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 		} else
 			*npp = np;
 	}
 	return (error);
 }
 
 /*
  * Nfs Version 3 commit rpc
  */
 static int
 nfs_commit(vp, offset, cnt, cred, procp)
 	register struct vnode *vp;
 	u_quad_t offset;
 	int cnt;
 	struct ucred *cred;
 	struct proc *procp;
 {
 	register caddr_t cp;
 	register u_long *tl;
 	register int t1, t2;
 	register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	caddr_t bpos, dpos, cp2;
 	int error = 0, wccflag = NFSV3_WCCRATTR;
 	struct mbuf *mreq, *mrep, *md, *mb, *mb2;
 	
 	if ((nmp->nm_flag & NFSMNT_HASWRITEVERF) == 0)
 		return (0);
 	nfsstats.rpccnt[NFSPROC_COMMIT]++;
 	nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1));
 	nfsm_fhtom(vp, 1);
 	nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
 	txdr_hyper(&offset, tl);
 	tl += 2;
 	*tl = txdr_unsigned(cnt);
 	nfsm_request(vp, NFSPROC_COMMIT, procp, cred);
 	nfsm_wcc_data(vp, wccflag);
 	if (!error) {
 		nfsm_dissect(tl, u_long *, NFSX_V3WRITEVERF);
 		if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl,
 			NFSX_V3WRITEVERF)) {
 			bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf,
 				NFSX_V3WRITEVERF);
 			error = NFSERR_STALEWRITEVERF;
 		}
 	}
 	nfsm_reqdone;
 	return (error);
 }
 
 /*
  * Kludge City..
  * - make nfs_bmap() essentially a no-op that does no translation
  * - do nfs_strategy() by doing I/O with nfs_readrpc/nfs_writerpc
  *   (Maybe I could use the process's page mapping, but I was concerned that
  *    Kernel Write might not be enabled and also figured copyout() would do
  *    a lot more work than bcopy() and also it currently happens in the
  *    context of the swapper process (2).
  */
 static int
 nfs_bmap(ap)
 	struct vop_bmap_args /* {
 		struct vnode *a_vp;
 		daddr_t  a_bn;
 		struct vnode **a_vpp;
 		daddr_t *a_bnp;
 		int *a_runp;
 		int *a_runb;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 
 	if (ap->a_vpp != NULL)
 		*ap->a_vpp = vp;
 	if (ap->a_bnp != NULL)
 		*ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
 	if (ap->a_runp != NULL)
 		*ap->a_runp = 0;
 	if (ap->a_runb != NULL)
 		*ap->a_runb = 0;
 	return (0);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
  * request.
  */
 static int
 nfs_strategy(ap)
 	struct vop_strategy_args *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	struct ucred *cr;
 	struct proc *p;
 	int error = 0;
 
 	if (bp->b_flags & B_PHYS)
 		panic("nfs physio");
 	if (bp->b_flags & B_ASYNC)
 		p = (struct proc *)0;
 	else
 		p = curproc;	/* XXX */
 	if (bp->b_flags & B_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 		nfs_asyncio(bp, NOCRED))
 		error = nfs_doio(bp, cr, p);
 	return (error);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 static int
 nfs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 /*
  * fsync vnode op. Just call nfs_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnodeop_desc *a_desc;
 		struct vnode * a_vp;
 		struct ucred * a_cred;
 		int  a_waitfor;
 		struct proc * a_p;
 	} */ *ap;
 {
 
 	return (nfs_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_p, 1));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  */
 static int
 nfs_flush(vp, cred, waitfor, p, commit)
 	register struct vnode *vp;
 	struct ucred *cred;
 	int waitfor;
 	struct proc *p;
 	int commit;
 {
 	register struct nfsnode *np = VTONFS(vp);
 	register struct buf *bp;
 	register int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int s, error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 #ifndef NFS_COMMITBVECSIZ
 #define NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but nas not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, nfs_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV3(vp) && commit) {
 		s = splbio();
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
 			    == (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			if (bvec != NULL && bvec != bvec_on_stack)
 				free(bvec, M_TEMP);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 			nbp = bp->b_vnbufs.le_next;
 			if (bvecpos >= bvecsize)
 				break;
 			if ((bp->b_flags & (B_BUSY | B_DELWRI | B_NEEDCOMMIT))
 				!= (B_DELWRI | B_NEEDCOMMIT))
 				continue;
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			bp->b_flags |= (B_BUSY | B_WRITEINPROG);
 			vfs_busy_pages(bp, 1);
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		splx(s);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = nfs_commit(vp, off, (int)(endoff - off),
 					  wcred, p);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = nfs_commit(vp, off, (int)size,
 						  bp->b_wcred, p);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(vp->v_mount);
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_WRITEINPROG);
 			if (retv) {
 			    vfs_unbusy_pages(bp);
 			    brelse(bp);
 			} else {
 			    vp->v_numoutput++;
 			    bp->b_flags |= B_ASYNC;
 			    if (bp->b_flags & B_DELWRI) {
 				--numdirtybuffers;
 			    	if (needsbuffer) {
 					vfs_bio_need_satisfy();
 				}
 			    }
+			    s = splbio();	/* XXX check this positionning */
 			    bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
 			    bp->b_dirtyoff = bp->b_dirtyend = 0;
 			    reassignbuf(bp, vp);
+			    splx(s);
 			    biodone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	s = splbio();
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
 		if (bp->b_flags & B_BUSY) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 			bp->b_flags |= B_WANTED;
 			error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
 				"nfsfsync", slptimeo);
 			splx(s);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT))
 			continue;
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= (B_BUSY|B_ASYNC);
 		else
 		    bp->b_flags |= (B_BUSY|B_ASYNC|B_WRITEINPROG|B_NEEDCOMMIT);
 		splx(s);
 		VOP_BWRITE(bp);
 		goto loop;
 	}
 	splx(s);
 	if (passone) {
 		passone = 0;
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			error = tsleep((caddr_t)&vp->v_numoutput,
 				slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
 			if (error) {
 			    if (nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
 				error = EINTR;
 				goto done;
 			    }
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			}
 		}
 		if (vp->v_dirtyblkhd.lh_first && commit) {
 			goto loop;
 		}
 	}
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  * Currently unsupported.
  */
 static int
 nfs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * The following kludge is to allow diskless support to work
 	 * until a real NFS lockd is implemented. Basically, just pretend
 	 * that this is a local lock.
 	 */
 	return (lf_advlock(ap, &(np->n_lockf), np->n_size));
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 
 	printf("tag VT_NFS, fileid %ld fsid 0x%lx",
 		np->n_vattr.va_fileid, np->n_vattr.va_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Just call nfs_writebp() with the force argument set to 1.
  */
 static int
 nfs_bwrite(ap)
 	struct vop_bwrite_args /* {
 		struct vnode *a_bp;
 	} */ *ap;
 {
 
 	return (nfs_writebp(ap->a_bp, 1));
 }
 
 /*
  * This is a clone of vn_bwrite(), except that B_WRITEINPROG isn't set unless
  * the force flag is one and it also handles the B_NEEDCOMMIT flag.
  */
 int
 nfs_writebp(bp, force)
 	register struct buf *bp;
 	int force;
 {
+	int s;
 	register int oldflags = bp->b_flags, retv = 1;
 	off_t off;
 
 	if(!(bp->b_flags & B_BUSY))
 		panic("bwrite: buffer is not busy???");
 
 	if (bp->b_flags & B_INVAL)
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 
 	if (bp->b_flags & B_DELWRI) {
 		--numdirtybuffers;
 		if (needsbuffer)
 			vfs_bio_need_satisfy();
 	}
+	s = splbio(); /* XXX check if needed */
 	bp->b_flags &= ~(B_READ|B_DONE|B_ERROR|B_DELWRI);
 
 	if ((oldflags & (B_ASYNC|B_DELWRI)) == (B_ASYNC|B_DELWRI)) {
 		reassignbuf(bp, bp->b_vp);
 	}
 
 	bp->b_vp->v_numoutput++;
 	curproc->p_stats->p_ru.ru_oublock++;
+	splx(s);
 
 	/*
 	 * If B_NEEDCOMMIT is set, a commit rpc may do the trick. If not
 	 * an actual write will have to be scheduled via. VOP_STRATEGY().
 	 * If B_WRITEINPROG is already set, then push it with a write anyhow.
 	 */
 	vfs_busy_pages(bp, 1);
 	if ((oldflags & (B_NEEDCOMMIT | B_WRITEINPROG)) == B_NEEDCOMMIT) {
 		off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff;
 		bp->b_flags |= B_WRITEINPROG;
 		retv = nfs_commit(bp->b_vp, off, bp->b_dirtyend-bp->b_dirtyoff,
 			bp->b_wcred, bp->b_proc);
 		bp->b_flags &= ~B_WRITEINPROG;
 		if (!retv) {
 			bp->b_dirtyoff = bp->b_dirtyend = 0;
 			bp->b_flags &= ~B_NEEDCOMMIT;
 			biodone(bp);
 		} else if (retv == NFSERR_STALEWRITEVERF)
 			nfs_clearcommit(bp->b_vp->v_mount);
 	}
 	if (retv) {
 		if (force)
 			bp->b_flags |= B_WRITEINPROG;
 		VOP_STRATEGY(bp);
 	}
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = biowait(bp);
 
 		if (oldflags & B_DELWRI) {
+			s = splbio();
 			reassignbuf(bp, bp->b_vp);
+			splx(s);
 		}
 
 		brelse(bp);
 		return (rtval);
 	} 
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vattr *vap;
 	register gid_t *gp;
 	register struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	mode_t mode = ap->a_mode;
 	struct vattr vattr;
 	register int i;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((mode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG: case VDIR: case VLNK:
 			return (EROFS);
 		}
 	}
 	/*
 	 * If you're the super-user,
 	 * you always get access.
 	 */
 	if (cred->cr_uid == 0)
 		return (0);
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred, ap->a_p);
 	if (error)
 		return (error);
 	/*
 	 * Access check is based on only one of owner, group, public.
 	 * If not owner, then check group. If not a member of the
 	 * group, then check public access.
 	 */
 	if (cred->cr_uid != vap->va_uid) {
 		mode >>= 3;
 		gp = cred->cr_groups;
 		for (i = 0; i < cred->cr_ngroups; i++, gp++)
 			if (vap->va_gid == *gp)
 				goto found;
 		mode >>= 3;
 found:
 		;
 	}
 	error = (vap->va_mode & mode) == mode ? 0 : EACCES;
 	return (error);
 }
 
 /*
  * Read wrapper for special devices.
  */
 static int
 nfsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	gettime(&tv);
 	np->n_atim.tv_sec = tv.tv_sec;
 	np->n_atim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 static int
 nfsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	gettime(&tv);
 	np->n_mtim.tv_sec = tv.tv_sec;
 	np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the nfsnode then do device close.
  */
 static int
 nfsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set access flag.
 	 */
 	np->n_flag |= NACC;
 	gettime(&tv);
 	np->n_atim.tv_sec = tv.tv_sec;
 	np->n_atim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct nfsnode *np = VTONFS(ap->a_vp);
 	struct timeval tv;
 
 	/*
 	 * Set update flag.
 	 */
 	np->n_flag |= NUPD;
 	gettime(&tv);
 	np->n_mtim.tv_sec = tv.tv_sec;
 	np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct nfsnode *np = VTONFS(vp);
 	struct timeval tv;
 	struct vattr vattr;
 
 	if (np->n_flag & (NACC | NUPD)) {
 		gettime(&tv);
 		if (np->n_flag & NACC) {
 			np->n_atim.tv_sec = tv.tv_sec;
 			np->n_atim.tv_nsec = tv.tv_usec * 1000;
 		}
 		if (np->n_flag & NUPD) {
 			np->n_mtim.tv_sec = tv.tv_sec;
 			np->n_mtim.tv_nsec = tv.tv_usec * 1000;
 		}
 		np->n_flag |= NCHG;
 		if (vp->v_usecount == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
 		}
 	}
 	return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
Index: head/sys/pci/if_de.c
===================================================================
--- head/sys/pci/if_de.c	(revision 34265)
+++ head/sys/pci/if_de.c	(revision 34266)
@@ -1,5442 +1,5444 @@
+#undef __FreeBSD__
+#define __FreeBSD__ 3
 /*	$NetBSD: if_de.c,v 1.56 1997/10/20 14:32:46 matt Exp $	*/
-/*	$Id: if_de.c,v 1.79 1998/02/06 12:14:08 eivind Exp $ */
+/*	$Id: if_de.c,v 1.80 1998/02/20 13:11:50 bde Exp $ */
 
 /*-
  * Copyright (c) 1994-1997 Matt Thomas (matt@3am-software.com)
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  * Id: if_de.c,v 1.94 1997/07/03 16:55:07 thomas Exp
  *
  */
 
 /*
  * DEC 21040 PCI Ethernet Controller
  *
  * Written by Matt Thomas
  * BPF support code stolen directly from if_ec.c
  *
  *   This driver supports the DEC DE435 or any other PCI
  *   board which support 21040, 21041, or 21140 (mostly).
  */
 #define	TULIP_HDR_DATA
 
 #include "opt_inet.h"
 #include "opt_ipx.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/sockio.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #if defined(__FreeBSD__)
 #include <machine/clock.h>
 #elif defined(__bsdi__) || defined(__NetBSD__)
 #include <sys/device.h>
 #endif
 
 #if defined(__NetBSD__)
 #include "rnd.h"
 #if NRND > 0
 #include <sys/rnd.h>
 #endif
 #endif
 
 #include <net/if.h>
 #if defined(SIOCSIFMEDIA) && !defined(TULIP_NOIFMEDIA)
 #include <net/if_media.h>
 #endif
 #include <net/if_dl.h>
 #ifdef TULIP_USE_SOFTINTR
 #include <net/netisr.h>
 #endif
 
 #if defined(__bsdi__) && _BSDI_VERSION >= 199701
 #include <dev/mii/mii.h>
 #include <dev/mii/miivar.h>
 #endif
 
 #include "bpfilter.h"
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
 
 #ifdef INET
 #include <netinet/in.h>
 #include <netinet/if_ether.h>
 #endif
 
 #ifdef IPX
 #include <netipx/ipx.h>
 #include <netipx/ipx_if.h>
 #endif
 
 #ifdef NS
 #include <netns/ns.h>
 #include <netns/ns_if.h>
 #endif
 
 #include <vm/vm.h>
 
 #if defined(__FreeBSD__)
 #include <vm/pmap.h>
 #include <pci.h>
 #if NPCI > 0
 #include <pci/pcivar.h>
 #include <pci/dc21040reg.h>
 #define	DEVAR_INCLUDE	"pci/if_devar.h"
 #endif
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
 #include <netinet/if_ether.h>
 #include <i386/pci/ic/dc21040reg.h>
 #include <i386/isa/isa.h>
 #include <i386/isa/icu.h>
 #include <i386/isa/dma.h>
 #include <i386/isa/isavar.h>
 #include <i386/pci/pci.h>
 #if _BSDI_VERSION < 199510
 #include <eisa.h>
 #else
 #define	NEISA 0
 #endif
 #if NEISA > 0 && _BSDI_VERSION >= 199401
 #include <i386/eisa/eisa.h>
 #define	TULIP_EISA
 #endif
 #define	DEVAR_INCLUDE	"i386/pci/if_devar.h"
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
 #include <net/if_ether.h>
 #if defined(INET)
 #include <netinet/if_inarp.h>
 #endif
 #include <machine/bus.h>
 #if defined(__alpha__)
 #include <machine/intr.h>
 #endif
 #include <dev/pci/pcireg.h>
 #include <dev/pci/pcivar.h>
 #include <dev/ic/dc21040reg.h>
 #define	DEVAR_INCLUDE	"dev/pci/if_devar.h"
 #endif /* __NetBSD__ */
 
 /*
  * Intel CPUs should use I/O mapped access.
  */
 #if defined(__i386__) || defined(TULIP_EISA)
 #define	TULIP_IOMAPPED
 #endif
 
 #if 0
 /*
  * This turns on all sort of debugging stuff and make the
  * driver much larger.
  */
 #define TULIP_DEBUG
 #endif
 
 #if 0
 #define	TULIP_PERFSTATS
 #endif
 
 #if 0
 #define	TULIP_USE_SOFTINTR
 #endif
 
 #define	TULIP_HZ	10
 
 #include DEVAR_INCLUDE
 /*
  * This module supports
  *	the DEC 21040 PCI Ethernet Controller.
  *	the DEC 21041 PCI Ethernet Controller.
  *	the DEC 21140 PCI Fast Ethernet Controller.
  */
 static void tulip_mii_autonegotiate(tulip_softc_t * const sc, const unsigned phyaddr);
 static tulip_intrfunc_t tulip_intr_shared(void *arg);
 static tulip_intrfunc_t tulip_intr_normal(void *arg);
 static void tulip_init(tulip_softc_t * const sc);
 static void tulip_reset(tulip_softc_t * const sc);
 static ifnet_ret_t tulip_ifstart_one(struct ifnet *ifp);
 static ifnet_ret_t tulip_ifstart(struct ifnet *ifp);
 static struct mbuf *tulip_txput(tulip_softc_t * const sc, struct mbuf *m);
 static void tulip_txput_setup(tulip_softc_t * const sc);
 static void tulip_rx_intr(tulip_softc_t * const sc);
 static void tulip_addr_filter(tulip_softc_t * const sc);
 static unsigned tulip_mii_readreg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno);
 static void tulip_mii_writereg(tulip_softc_t * const sc, unsigned devaddr, unsigned regno, unsigned data);
 static int tulip_mii_map_abilities(tulip_softc_t * const sc, unsigned abilities);
 static tulip_media_t tulip_mii_phy_readspecific(tulip_softc_t * const sc);
 static int tulip_srom_decode(tulip_softc_t * const sc);
 #if defined(IFM_ETHER)
 static int tulip_ifmedia_change(struct ifnet * const ifp);
 static void tulip_ifmedia_status(struct ifnet * const ifp, struct ifmediareq *req);
 #endif
 /* static void tulip_21140_map_media(tulip_softc_t *sc); */
 
 static void
 tulip_timeout_callback(
     void *arg)
 {
     tulip_softc_t * const sc = arg;
     tulip_spl_t s = TULIP_RAISESPL();
 
     TULIP_PERFSTART(timeout)
 
     sc->tulip_flags &= ~TULIP_TIMEOUTPENDING;
     sc->tulip_probe_timeout -= 1000 / TULIP_HZ;
     (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_TIMER);
 
     TULIP_PERFEND(timeout);
     TULIP_RESTORESPL(s);
 }
 
 static void
 tulip_timeout(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_flags & TULIP_TIMEOUTPENDING)
 	return;
     sc->tulip_flags |= TULIP_TIMEOUTPENDING;
     timeout(tulip_timeout_callback, sc, (hz + TULIP_HZ / 2) / TULIP_HZ);
 }
 
 #if defined(TULIP_NEED_FASTTIMEOUT)
 static void
 tulip_fasttimeout_callback(
     void *arg)
 {
     tulip_softc_t * const sc = arg;
     tulip_spl_t s = TULIP_RAISESPL();
 
     sc->tulip_flags &= ~TULIP_FASTTIMEOUTPENDING;
     (sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_FASTTIMER);
     TULIP_RESTORESPL(s);
 }
 
 static void
 tulip_fasttimeout(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_flags & TULIP_FASTTIMEOUTPENDING)
 	return;
     sc->tulip_flags |= TULIP_FASTTIMEOUTPENDING;
     timeout(tulip_fasttimeout_callback, sc, 1);
 }
 #endif
 
 static int
 tulip_txprobe(
     tulip_softc_t * const sc)
 {
     struct mbuf *m;
     /*
      * Before we are sure this is the right media we need
      * to send a small packet to make sure there's carrier.
      * Strangely, BNC and AUI will "see" receive data if
      * either is connected so the transmit is the only way
      * to verify the connectivity.
      */
     MGETHDR(m, M_DONTWAIT, MT_DATA);
     if (m == NULL)
 	return 0;
     /*
      * Construct a LLC TEST message which will point to ourselves.
      */
     bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_dhost, 6);
     bcopy(sc->tulip_enaddr, mtod(m, struct ether_header *)->ether_shost, 6);
     mtod(m, struct ether_header *)->ether_type = htons(3);
     mtod(m, unsigned char *)[14] = 0;
     mtod(m, unsigned char *)[15] = 0;
     mtod(m, unsigned char *)[16] = 0xE3;	/* LLC Class1 TEST (no poll) */
     m->m_len = m->m_pkthdr.len = sizeof(struct ether_header) + 3;
     /*
      * send it!
      */
     sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
     sc->tulip_intrmask |= TULIP_STS_TXINTR;
     sc->tulip_flags |= TULIP_TXPROBE_ACTIVE;
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
     TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     if ((m = tulip_txput(sc, m)) != NULL)
 	m_freem(m);
     sc->tulip_probe.probe_txprobes++;
     return 1;
 }
 
 #ifdef BIG_PACKET
 #define TULIP_SIAGEN_WATCHDOG	(sc->tulip_if.if_mtu > ETHERMTU ? TULIP_WATCHDOG_RXDISABLE|TULIP_WATCHDOG_TXDISABLE : 0)
 #else
 #define	TULIP_SIAGEN_WATCHDOG	0
 #endif
 
 static void
 tulip_media_set(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     const tulip_media_info_t *mi = sc->tulip_mediums[media];
 
     if (mi == NULL)
 	return;
 
     /*
      * If we are switching media, make sure we don't think there's
      * any stale RX activity
      */
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	TULIP_CSR_WRITE(sc, csr_sia_tx_rx,        mi->mi_sia_tx_rx);
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_control|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	    DELAY(50);
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_gp_data|mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	} else {
 	    TULIP_CSR_WRITE(sc, csr_sia_general,  mi->mi_sia_general|TULIP_SIAGEN_WATCHDOG);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_connectivity, mi->mi_sia_connectivity);
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 #define	TULIP_GPR_CMDBITS	(TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_SCRAMBLER|TULIP_CMD_TXTHRSHLDCTL)
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, (u_int8_t) mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	/*
 	 * If the cmdmode bits don't match the currently operating mode,
 	 * set the cmdmode appropriately and reset the chip.
 	 */
 	if (((mi->mi_cmdmode ^ TULIP_CSR_READ(sc, csr_command)) & TULIP_GPR_CMDBITS) != 0) {
 	    sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 	    sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    tulip_reset(sc);
 	}
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpcontrol);
 	TULIP_CSR_WRITE(sc, csr_sia_general, mi->mi_gpdata);
     } else if (mi->mi_type == TULIP_MEDIAINFO_MII
 	       && sc->tulip_probe_state != TULIP_PROBE_INACTIVE) {
 	int idx;
 	if (sc->tulip_features & TULIP_HAVE_SIAGP) {
 	    const u_int8_t *dp;
 	    dp = &sc->tulip_rombuf[mi->mi_reset_offset];
 	    for (idx = 0; idx < mi->mi_reset_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    dp = &sc->tulip_rombuf[mi->mi_gpr_offset];
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++, dp += 2) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_sia_general, (dp[0] + 256 * dp[1]) << 16);
 	    }
 	} else {
 	    for (idx = 0; idx < mi->mi_reset_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx]);
 	    }
 	    sc->tulip_phyaddr = mi->mi_phyaddr;
 	    for (idx = 0; idx < mi->mi_gpr_length; idx++) {
 		DELAY(10);
 		TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx]);
 	    }
 	}
 	if (sc->tulip_flags & TULIP_TRYNWAY) {
 	    tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	} else if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    u_int32_t data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_CONTROL);
 	    data &= ~(PHYCTL_SELECT_100MB|PHYCTL_FULL_DUPLEX|PHYCTL_AUTONEG_ENABLE);
 	    sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    if (TULIP_IS_MEDIA_FD(media))
 		data |= PHYCTL_FULL_DUPLEX;
 	    if (TULIP_IS_MEDIA_100MB(media))
 		data |= PHYCTL_SELECT_100MB;
 	    tulip_mii_writereg(sc, sc->tulip_phyaddr, PHYREG_CONTROL, data);
 	}
     }
 }
 
 static void
 tulip_linkup(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	sc->tulip_flags |= TULIP_PRINTLINKUP;
     sc->tulip_flags |= TULIP_LINKUP;
     sc->tulip_if.if_flags &= ~IFF_OACTIVE;
 #if 0 /* XXX how does with work with ifmedia? */
     if ((sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	if (sc->tulip_if.if_flags & IFF_FULLDUPLEX) {
 	    if (TULIP_CAN_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_FD_MEDIA_OF(media)] != NULL)
 		media = TULIP_FD_MEDIA_OF(media);
 	} else {
 	    if (TULIP_IS_MEDIA_FD(media)
 		    && sc->tulip_mediums[TULIP_HD_MEDIA_OF(media)] != NULL)
 		media = TULIP_HD_MEDIA_OF(media);
 	}
     }
 #endif
     if (sc->tulip_media != media) {
 #ifdef TULIP_DEBUG
 	sc->tulip_dbg.dbg_last_media = sc->tulip_media;
 #endif
 	sc->tulip_media = media;
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	if (TULIP_IS_MEDIA_FD(sc->tulip_media)) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	} else if (sc->tulip_chipid != TULIP_21041 || (sc->tulip_flags & TULIP_DIDNWAY) == 0) {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	}
     }
     /*
      * We could set probe_timeout to 0 but setting to 3000 puts this
      * in one central place and the only matters is tulip_link is
      * followed by a tulip_timeout.  Therefore setting it should not
      * result in aberrant behavour.
      */
     sc->tulip_probe_timeout = 3000;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_TRYNWAY);
     if (sc->tulip_flags & TULIP_INRESET) {
 	tulip_media_set(sc, sc->tulip_media);
     } else if (sc->tulip_probe_media != sc->tulip_media) {
 	/*
 	 * No reason to change media if we have the right media.
 	 */
 	tulip_reset(sc);
 	tulip_init(sc);
     }
 }
 
 static void
 tulip_media_print(
     tulip_softc_t * const sc)
 {
     if ((sc->tulip_flags & TULIP_LINKUP) == 0)
 	return;
     if (sc->tulip_flags & TULIP_PRINTMEDIA) {
 	printf(TULIP_PRINTF_FMT ": enabling %s port\n",
 	       TULIP_PRINTF_ARGS,
 	       tulip_mediums[sc->tulip_media]);
 	sc->tulip_flags &= ~(TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
     } else if (sc->tulip_flags & TULIP_PRINTLINKUP) {
 	printf(TULIP_PRINTF_FMT ": link up\n", TULIP_PRINTF_ARGS);
 	sc->tulip_flags &= ~TULIP_PRINTLINKUP;
     }
 }
 
 #if defined(TULIP_DO_GPR_SENSE)
 static tulip_media_t
 tulip_21140_gpr_media_sense(
     tulip_softc_t * const sc)
 {
     tulip_media_t maybe_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t last_media = TULIP_MEDIA_UNKNOWN;
     tulip_media_t media;
 
     /*
      * If one of the media blocks contained a default media flag,
      * use that.
      */
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	const tulip_media_info_t *mi;
 	/*
 	 * Media is not supported (or is full-duplex).
 	 */
 	if ((mi = sc->tulip_mediums[media]) == NULL || TULIP_IS_MEDIA_FD(media))
 	    continue;
 	if (mi->mi_type != TULIP_MEDIAINFO_GPR)
 	    continue;
 
 	/*
 	 * Remember the media is this is the "default" media.
 	 */
 	if (mi->mi_default && maybe_media == TULIP_MEDIA_UNKNOWN)
 	    maybe_media = media;
 
 	/*
 	 * No activity mask?  Can't see if it is active if there's no mask.
 	 */
 	if (mi->mi_actmask == 0)
 	    continue;
 
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) != mi->mi_actdata)
 	    continue;
 
 #if defined(TULIP_DEBUG)
 	printf(TULIP_PRINTF_FMT ": gpr_media_sense: %s: 0x%02x & 0x%02x == 0x%02x\n",
 	       TULIP_PRINTF_ARGS, tulip_mediums[media],
 	       TULIP_CSR_READ(sc, csr_gp) & 0xFF,
 	       mi->mi_actmask, mi->mi_actdata);
 #endif
 	/*
 	 * It does!  If this is the first media we detected, then 
 	 * remember this media.  If isn't the first, then there were
 	 * multiple matches which we equate to no match (since we don't
 	 * which to select (if any).
 	 */
 	if (last_media == TULIP_MEDIA_UNKNOWN) {
 	    last_media = media;
 	} else if (last_media != media) {
 	    last_media = TULIP_MEDIA_UNKNOWN;
 	}
     }
     return (last_media != TULIP_MEDIA_UNKNOWN) ? last_media : maybe_media;
 }
 #endif /* TULIP_DO_GPR_SENSE */
 
 static tulip_link_status_t
 tulip_media_link_monitor(
     tulip_softc_t * const sc)
 {
     const tulip_media_info_t * const mi = sc->tulip_mediums[sc->tulip_media];
     tulip_link_status_t linkup = TULIP_LINK_DOWN;
 
     if (mi == NULL) {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 	panic("tulip_media_link_monitor: %s: botch at line %d\n",
 	      tulip_mediums[sc->tulip_media],__LINE__);
 #endif
 	return TULIP_LINK_UNKNOWN;
     }
 
 
     /*
      * Have we seen some packets?  If so, the link must be good.
      */
     if ((sc->tulip_flags & (TULIP_RXACT|TULIP_LINKUP)) == (TULIP_RXACT|TULIP_LINKUP)) {
 	sc->tulip_flags &= ~TULIP_RXACT;
 	sc->tulip_probe_timeout = 3000;
 	return TULIP_LINK_UP;
     }
 
     sc->tulip_flags &= ~TULIP_RXACT;
     if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 	u_int32_t status;
 	/*
 	 * Read the PHY status register.
 	 */
 	status = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
 	if (status & PHYSTS_AUTONEG_DONE) {
 	    /*
 	     * If the PHY has completed autonegotiation, see the if the
 	     * remote systems abilities have changed.  If so, upgrade or
 	     * downgrade as appropriate.
 	     */
 	    u_int32_t abilities = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_AUTONEG_ABILITIES);
 	    abilities = (abilities << 6) & status;
 	    if (abilities != sc->tulip_abilities) {
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation changed: 0x%04x -> 0x%04x\n",
 			   TULIP_PRINTF_ARGS, sc->tulip_phyaddr,
 			   sc->tulip_abilities, abilities);
 #endif
 		if (tulip_mii_map_abilities(sc, abilities)) {
 		    tulip_linkup(sc, sc->tulip_probe_media);
 		    return TULIP_LINK_UP;
 		}
 		/*
 		 * if we had selected media because of autonegotiation,
 		 * we need to probe for the new media.
 		 */
 		sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		if (sc->tulip_flags & TULIP_DIDNWAY)
 		    return TULIP_LINK_DOWN;
 	    }
 	}
 	/*
 	 * The link is now up.  If was down, say its back up.
 	 */
 	if ((status & (PHYSTS_LINK_UP|PHYSTS_REMOTE_FAULT)) == PHYSTS_LINK_UP)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_GPR) {
 	/*
 	 * No activity sensor?  Assume all's well.
 	 */
 	if (mi->mi_actmask == 0)
 	    return TULIP_LINK_UNKNOWN;
 	/*
 	 * Does the activity data match?
 	 */
 	if ((TULIP_CSR_READ(sc, csr_gp) & mi->mi_actmask) == mi->mi_actdata)
 	    linkup = TULIP_LINK_UP;
     } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 	/*
 	 * Assume non TP ok for now.
 	 */
 	if (!TULIP_IS_MEDIA_TP(sc->tulip_media))
 	    return TULIP_LINK_UNKNOWN;
 	if ((TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL) == 0)
 	    linkup = TULIP_LINK_UP;
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_timeout <= 0)
 	    printf(TULIP_PRINTF_FMT ": sia status = 0x%08x\n", TULIP_PRINTF_ARGS, TULIP_CSR_READ(sc, csr_sia_status));
 #endif
     } else if (mi->mi_type == TULIP_MEDIAINFO_SYM) {
 	return TULIP_LINK_UNKNOWN;
     }
     /*
      * We will wait for 3 seconds until the link goes into suspect mode.
      */
     if (sc->tulip_flags & TULIP_LINKUP) {
 	if (linkup == TULIP_LINK_UP)
 	    sc->tulip_probe_timeout = 3000;
 	if (sc->tulip_probe_timeout > 0)
 	    return TULIP_LINK_UP;
 
 	sc->tulip_flags &= ~TULIP_LINKUP;
 	printf(TULIP_PRINTF_FMT ": link down: cable problem?\n", TULIP_PRINTF_ARGS);
     }
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_link_downed++;
 #endif
     return TULIP_LINK_DOWN;
 }
 
 static void
 tulip_media_poll(
     tulip_softc_t * const sc,
     tulip_mediapoll_event_t event)
 {
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE
 	    && event == TULIP_MEDIAPOLL_TIMER) {
 	switch (tulip_media_link_monitor(sc)) {
 	    case TULIP_LINK_DOWN: {
 		/*
 		 * Link Monitor failed.  Probe for new media.
 		 */
 		event = TULIP_MEDIAPOLL_LINKFAIL;
 		break;
 	    }
 	    case TULIP_LINK_UP: {
 		/*
 		 * Check again soon.
 		 */
 		tulip_timeout(sc);
 		return;
 	    }
 	    case TULIP_LINK_UNKNOWN: {
 		/*
 		 * We can't tell so don't bother.
 		 */
 		return;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE) {
 	    if (TULIP_DO_AUTOSENSE(sc)) {
 #if defined(TULIP_DEBUG)
 		sc->tulip_dbg.dbg_link_failures++;
 #endif
 		sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 		tulip_reset(sc);	/* restart probe */
 	    }
 	    return;
 	}
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_link_pollintrs++;
 #endif
     }
 
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE)
 	    return;
 	sc->tulip_probe_mediamask = 0;
 	sc->tulip_probe_passes = 0;
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_media_probes++;
 #endif
 	/*
 	 * If the SROM contained an explicit media to use, use it.
 	 */
 	sc->tulip_cmdmode &= ~(TULIP_CMD_RXRUN|TULIP_CMD_FULLDUPLEX);
 	sc->tulip_flags |= TULIP_TRYNWAY|TULIP_PROBE1STPASS;
 	sc->tulip_flags &= ~(TULIP_DIDNWAY|TULIP_PRINTMEDIA|TULIP_PRINTLINKUP);
 	/*
 	 * connidx is defaulted to a media_unknown type.
 	 */
 	sc->tulip_probe_media = tulip_srom_conninfo[sc->tulip_connidx].sc_media;
 	if (sc->tulip_probe_media != TULIP_MEDIA_UNKNOWN) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    tulip_timeout(sc);
 	    return;
 	}
 
 	if (sc->tulip_features & TULIP_HAVE_GPR) {
 	    sc->tulip_probe_state = TULIP_PROBE_GPRTEST;
 	    sc->tulip_probe_timeout = 2000;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	    sc->tulip_probe_timeout = 0;
 	    sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	}
     }
 
     /*
      * Ignore txprobe failures or spurious callbacks.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED
 	    && sc->tulip_probe_state != TULIP_PROBE_MEDIATEST) {
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * If we really transmitted a packet, then that's the media we'll use.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK || event == TULIP_MEDIAPOLL_LINKPASS) {
 	if (event == TULIP_MEDIAPOLL_LINKPASS)
 	    sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 #if defined(TULIP_DEBUG)
 	else
 	    sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	tulip_linkup(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_GPRTEST) {
 #if defined(TULIP_DO_GPR_SENSE)
 	/*
 	 * Check for media via the general purpose register.
 	 *
 	 * Try to sense the media via the GPR.  If the same value
 	 * occurs 3 times in a row then just use that.
 	 */
 	if (sc->tulip_probe_timeout > 0) {
 	    tulip_media_t new_probe_media = tulip_21140_gpr_media_sense(sc);
 #if defined(TULIP_DEBUG)
 	    printf(TULIP_PRINTF_FMT ": media_poll: gpr sensing = %s\n",
 		   TULIP_PRINTF_ARGS, tulip_mediums[new_probe_media]);
 #endif
 	    if (new_probe_media != TULIP_MEDIA_UNKNOWN) {
 		if (new_probe_media == sc->tulip_probe_media) {
 		    if (--sc->tulip_probe_count == 0)
 			tulip_linkup(sc, sc->tulip_probe_media);
 		} else {
 		    sc->tulip_probe_count = 10;
 		}
 	    }
 	    sc->tulip_probe_media = new_probe_media;
 	    tulip_timeout(sc);
 	    return;
 	}
 #endif /* TULIP_DO_GPR_SENSE */
 	/*
 	 * Brute force.  We cycle through each of the media types
 	 * and try to transmit a packet.
 	 */
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	sc->tulip_probe_timeout = 0;
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state != TULIP_PROBE_MEDIATEST
 	   && (sc->tulip_features & TULIP_HAVE_MII)) {
 	tulip_media_t old_media = sc->tulip_probe_media;
 	tulip_mii_autonegotiate(sc, sc->tulip_phyaddr);
 	switch (sc->tulip_probe_state) {
 	    case TULIP_PROBE_FAILED:
 	    case TULIP_PROBE_MEDIATEST: {
 		/*
 		 * Try the next media.
 		 */
 		sc->tulip_probe_mediamask |= sc->tulip_mediums[sc->tulip_probe_media]->mi_mediamask;
 		sc->tulip_probe_timeout = 0;
 #ifdef notyet
 		if (sc->tulip_probe_state == TULIP_PROBE_FAILED)
 		    break;
 		if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		    break;
 		sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 300;
 #endif
 		break;
 	    }
 	    case TULIP_PROBE_PHYAUTONEG: {
 		return;
 	    }
 	    case TULIP_PROBE_INACTIVE: {
 		/*
 		 * Only probe if we autonegotiated a media that hasn't failed.
 		 */
 		sc->tulip_probe_timeout = 0;
 		if (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media)) {
 		    sc->tulip_probe_media = old_media;
 		    break;
 		}
 		tulip_linkup(sc, sc->tulip_probe_media);
 		tulip_timeout(sc);
 		return;
 	    }
 	    default: {
 #if defined(DIAGNOSTIC) || defined(TULIP_DEBUG)
 		panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 		break;
 	    }
 	}
     }
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_failed[sc->tulip_probe_media]++;
 #endif
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 	return;
     }
 
     /*
      * switch to another media if we tried this one enough.
      */
     if (/* event == TULIP_MEDIAPOLL_TXPROBE_FAILED || */ sc->tulip_probe_timeout <= 0) {
 #if defined(TULIP_DEBUG)
 	if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 	    printf(TULIP_PRINTF_FMT ": poll media unknown!\n",
 		   TULIP_PRINTF_ARGS);
 	    sc->tulip_probe_media = TULIP_MEDIA_MAX;
 	}
 #endif
 	/*
 	 * Find the next media type to check for.  Full Duplex
 	 * types are not allowed.
 	 */
 	do {
 	    sc->tulip_probe_media -= 1;
 	    if (sc->tulip_probe_media == TULIP_MEDIA_UNKNOWN) {
 		if (++sc->tulip_probe_passes == 3) {
 		    printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n",
 			   TULIP_PRINTF_ARGS);
 		    if ((sc->tulip_if.if_flags & IFF_UP) == 0) {
 			sc->tulip_if.if_flags &= ~IFF_RUNNING;
 			sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 			return;
 		    }
 		}
 		sc->tulip_flags ^= TULIP_TRYNWAY;	/* XXX */
 		sc->tulip_probe_mediamask = 0;
 		sc->tulip_probe_media = TULIP_MEDIA_MAX - 1;
 	    }
 	} while (sc->tulip_mediums[sc->tulip_probe_media] == NULL
 		 || (sc->tulip_probe_mediamask & TULIP_BIT(sc->tulip_probe_media))
 		 || TULIP_IS_MEDIA_FD(sc->tulip_probe_media));
 
 #if defined(TULIP_DEBUG)
 	printf(TULIP_PRINTF_FMT ": %s: probing %s\n", TULIP_PRINTF_ARGS,
 	       event == TULIP_MEDIAPOLL_TXPROBE_FAILED ? "txprobe failed" : "timeout",
 	       tulip_mediums[sc->tulip_probe_media]);
 #endif
 	sc->tulip_probe_timeout = TULIP_IS_MEDIA_TP(sc->tulip_probe_media) ? 2500 : 1000;
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe.probe_txprobes = 0;
 	tulip_reset(sc);
 	tulip_media_set(sc, sc->tulip_probe_media);
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     }
     tulip_timeout(sc);
 
     /*
      * If this is hanging off a phy, we know are doing NWAY and we have
      * forced the phy to a specific speed.  Wait for link up before
      * before sending a packet.
      */
     switch (sc->tulip_mediums[sc->tulip_probe_media]->mi_type) {
 	case TULIP_MEDIAINFO_MII: {
 	    if (sc->tulip_probe_media != tulip_mii_phy_readspecific(sc))
 		return;
 	    break;
 	}
 	case TULIP_MEDIAINFO_SIA: {
 	    if (TULIP_IS_MEDIA_TP(sc->tulip_probe_media)) {
 		if (TULIP_CSR_READ(sc, csr_sia_status) & TULIP_SIASTS_LINKFAIL)
 		    return;
 		tulip_linkup(sc, sc->tulip_probe_media);
 #ifdef notyet
 		if (sc->tulip_features & TULIP_HAVE_MII)
 		    tulip_timeout(sc);
 #endif
 		return;
 	    }
 	    break;
 	}
 	case TULIP_MEDIAINFO_RESET:
 	case TULIP_MEDIAINFO_SYM:
 	case TULIP_MEDIAINFO_NONE:
 	case TULIP_MEDIAINFO_GPR: {
 	    break;
 	}
     }
     /*
      * Try to send a packet.
      */
     tulip_txprobe(sc);
 }
 
 static void
 tulip_media_select(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_features & TULIP_HAVE_GPR) {
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_PINSET|sc->tulip_gpinit);
 	DELAY(10);
 	TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpdata);
     }
     /*
      * If this board has no media, just return
      */
     if (sc->tulip_features & TULIP_HAVE_NOMEDIA)
 	return;
 
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	(*sc->tulip_boardsw->bd_media_poll)(sc, TULIP_MEDIAPOLL_START);
     } else {
 	tulip_media_set(sc, sc->tulip_media);
     }
 }
 
 static void
 tulip_21040_mediainfo_init(
     tulip_softc_t * const sc,
     tulip_media_t media)
 {
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_THRSHLD160
 	|TULIP_CMD_BACKOFFCTR;
     sc->tulip_if.if_baudrate = 10000000;
 
     if (media == TULIP_MEDIA_10BASET || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[0], 21040, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[1], 21040, 10BASET_FD);
     }
 
     if (media == TULIP_MEDIA_AUIBNC || media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[2], 21040, AUIBNC);
     }
 
     if (media == TULIP_MEDIA_UNKNOWN) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &sc->tulip_mediainfo[3], 21040, EXTSIA);
     }
 }
 
 static void
 tulip_21040_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_UNKNOWN);
     return;
 }
 
 static void
 tulip_21040_10baset_only_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_10BASET);
     tulip_media_set(sc, TULIP_MEDIA_10BASET);
     sc->tulip_media = TULIP_MEDIA_10BASET;
 }
 
 static void
 tulip_21040_10baset_only_media_select(
     tulip_softc_t * const sc)
 {
     sc->tulip_flags |= TULIP_LINKUP;
     if (sc->tulip_media == TULIP_MEDIA_10BASET_FD) {
 	sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags &= ~TULIP_SQETEST;
     } else {
 	sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	sc->tulip_flags |= TULIP_SQETEST;
     }
     tulip_media_set(sc, sc->tulip_media);
 }
 
 static void
 tulip_21040_auibnc_only_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_21040_mediainfo_init(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_flags |= TULIP_SQETEST|TULIP_LINKUP;
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_media = TULIP_MEDIA_AUIBNC;
 }
 
 static void
 tulip_21040_auibnc_only_media_select(
     tulip_softc_t * const sc)
 {
     tulip_media_set(sc, TULIP_MEDIA_AUIBNC);
     sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 }
 
 static const tulip_boardsw_t tulip_21040_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_media_probe,
     tulip_media_select,
     tulip_media_poll,
 };
 
 static const tulip_boardsw_t tulip_21040_10baset_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_10baset_only_media_probe,
     tulip_21040_10baset_only_media_select,
     NULL,
 };
 
 static const tulip_boardsw_t tulip_21040_auibnc_only_boardsw = {
     TULIP_21040_GENERIC,
     tulip_21040_auibnc_only_media_probe,
     tulip_21040_auibnc_only_media_select,
     NULL,
 };
 
 static void
 tulip_21041_mediainfo_init(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t * const mi = sc->tulip_mediainfo;
 
 #ifdef notyet
     if (sc->tulip_revinfo >= 0x20) {
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, 10BASET);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, 10BASET_FD);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041P2, AUI);
 	TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041P2, BNC);
 	return;
     }
 #endif
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[0], 21041, 10BASET);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[1], 21041, 10BASET_FD);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[2], 21041, AUI);
     TULIP_MEDIAINFO_SIA_INIT(sc, &mi[3], 21041, BNC);
 }
 
 static void
 tulip_21041_media_probe(
     tulip_softc_t * const sc)
 {
     sc->tulip_if.if_baudrate = 10000000;
     sc->tulip_cmdmode |= TULIP_CMD_CAPTREFFCT|TULIP_CMD_ENHCAPTEFFCT
 	|TULIP_CMD_THRSHLD160|TULIP_CMD_BACKOFFCTR;
     sc->tulip_intrmask |= TULIP_STS_LINKPASS;
     tulip_21041_mediainfo_init(sc);
 }
 
 static void
 tulip_21041_media_poll(
     tulip_softc_t * const sc,
     const tulip_mediapoll_event_t event)
 {
     u_int32_t sia_status;
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 
     if (event == TULIP_MEDIAPOLL_LINKFAIL) {
 	if (sc->tulip_probe_state != TULIP_PROBE_INACTIVE
 		|| !TULIP_DO_AUTOSENSE(sc))
 	    return;
 	sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	tulip_reset(sc);	/* start probe */
 	return;
     }
 
     /*
      * If we've been been asked to start a poll or link change interrupt
      * restart the probe (and reset the tulip to a known state).
      */
     if (event == TULIP_MEDIAPOLL_START) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_RXRUN);
 #ifdef notyet
 	if (sc->tulip_revinfo >= 0x20) {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	}
 #endif
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_10BASET_TIMEOUT;
 	tulip_media_set(sc, TULIP_MEDIA_10BASET);
 	tulip_timeout(sc);
 	return;
     }
 
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	return;
 
     if (event == TULIP_MEDIAPOLL_TXPROBE_OK) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_txprobes_ok[sc->tulip_probe_media]++;
 #endif
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     sia_status = TULIP_CSR_READ(sc, csr_sia_status);
     TULIP_CSR_WRITE(sc, csr_sia_status, sia_status);
     if ((sia_status & TULIP_SIASTS_LINKFAIL) == 0) {
 	if (sc->tulip_revinfo >= 0x20) {
 	    if (sia_status & (PHYSTS_10BASET_FD << (16 - 6)))
 		sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
 	}
 	/*
 	 * If the link has passed LinkPass, 10baseT is the
 	 * proper media to use.
 	 */
 	tulip_linkup(sc, sc->tulip_probe_media);
 	return;
     }
 
     /*
      * wait for up to 2.4 seconds for the link to reach pass state.
      * Only then start scanning the other media for activity.
      * choose media with receive activity over those without.
      */
     if (sc->tulip_probe_media == TULIP_MEDIA_10BASET) {
 	if (event != TULIP_MEDIAPOLL_TIMER)
 	    return;
 	if (sc->tulip_probe_timeout > 0
 		&& (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) == 0) {
 	    tulip_timeout(sc);
 	    return;
 	}
 	sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	sc->tulip_flags |= TULIP_WANTRXACT;
 	if (sia_status & TULIP_SIASTS_OTHERRXACTIVITY) {
 	    sc->tulip_probe_media = TULIP_MEDIA_BNC;
 	} else {
 	    sc->tulip_probe_media = TULIP_MEDIA_AUI;
 	}
 	tulip_media_set(sc, sc->tulip_probe_media);
 	tulip_timeout(sc);
 	return;
     }
 
     /*
      * If we failed, clear the txprobe active flag.
      */
     if (event == TULIP_MEDIAPOLL_TXPROBE_FAILED)
 	sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 
 
     if (event == TULIP_MEDIAPOLL_TIMER) {
 	/*
 	 * If we've received something, then that's our link!
 	 */
 	if (sc->tulip_flags & TULIP_RXACT) {
 	    tulip_linkup(sc, sc->tulip_probe_media);
 	    return;
 	}
 	/*
 	 * if no txprobe active  
 	 */
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0
 		&& ((sc->tulip_flags & TULIP_WANTRXACT) == 0
 		    || (sia_status & TULIP_SIASTS_RXACTIVITY))) {
 	    sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    tulip_txprobe(sc);
 	    tulip_timeout(sc);
 	    return;
 	}
 	/*
 	 * Take 2 passes through before deciding to not
 	 * wait for receive activity.  Then take another
 	 * two passes before spitting out a warning.
 	 */
 	if (sc->tulip_probe_timeout <= 0) {
 	    if (sc->tulip_flags & TULIP_WANTRXACT) {
 		sc->tulip_flags &= ~TULIP_WANTRXACT;
 		sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
 	    } else {
 		printf(TULIP_PRINTF_FMT ": autosense failed: cable problem?\n",
 		       TULIP_PRINTF_ARGS);
 		if ((sc->tulip_if.if_flags & IFF_UP) == 0) {
 		    sc->tulip_if.if_flags &= ~IFF_RUNNING;
 		    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		    return;
 		}
 	    }
 	}
     }
     
     /*
      * Since this media failed to probe, try the other one.
      */
     sc->tulip_probe_timeout = TULIP_21041_PROBE_AUIBNC_TIMEOUT;
     if (sc->tulip_probe_media == TULIP_MEDIA_AUI) {
 	sc->tulip_probe_media = TULIP_MEDIA_BNC;
     } else {
 	sc->tulip_probe_media = TULIP_MEDIA_AUI;
     }
     tulip_media_set(sc, sc->tulip_probe_media);
     sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
     tulip_timeout(sc);
 }
 
 static const tulip_boardsw_t tulip_21041_boardsw = {
     TULIP_21041_GENERIC,
     tulip_21041_media_probe,
     tulip_media_select,
     tulip_21041_media_poll
 };
 
 static const tulip_phy_attr_t tulip_mii_phy_attrlist[] = {
     { 0x20005c00, 0,		/* 08-00-17 */
       {
 	{ 0x19, 0x0040, 0x0040 },	/* 10TX */
 	{ 0x19, 0x0040, 0x0000 },	/* 100TX */
       },
 #if defined(TULIP_DEBUG)
       "NS DP83840",
 #endif
     },
     { 0x0281F400, 0,		/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Seeq 80C240"
 #endif
     },
 #if 0
     { 0x0015F420, 0,	/* 00-A0-7D */
       {
 	{ 0x12, 0x0010, 0x0000 },	/* 10T */
 	{ },				/* 100TX */
 	{ 0x12, 0x0010, 0x0010 },	/* 100T4 */
 	{ 0x12, 0x0008, 0x0008 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "Broadcom BCM5000"
 #endif
     },
 #endif
     { 0x0281F400, 0,		/* 00-A0-BE */
       {
 	{ 0x11, 0x8000, 0x0000 },	/* 10T */
 	{ 0x11, 0x8000, 0x8000 },	/* 100TX */
 	{ },				/* 100T4 */
 	{ 0x11, 0x4000, 0x4000 },	/* FULL_DUPLEX */
       },
 #if defined(TULIP_DEBUG)
       "ICS 1890"
 #endif 
     },
     { 0 }
 };
 
 static tulip_media_t
 tulip_mii_phy_readspecific(
     tulip_softc_t * const sc)
 {
     const tulip_phy_attr_t *attr;
     u_int16_t data;
     u_int32_t id;
     unsigned idx = 0;
     static const tulip_media_t table[] = {
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET,
 	TULIP_MEDIA_100BASETX,
 	TULIP_MEDIA_100BASET4,
 	TULIP_MEDIA_UNKNOWN,
 	TULIP_MEDIA_10BASET_FD,
 	TULIP_MEDIA_100BASETX_FD,
 	TULIP_MEDIA_UNKNOWN
     };
 
     /*
      * Don't read phy specific registers if link is not up.
      */
     data = tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_STATUS);
     if ((data & (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS)) != (PHYSTS_LINK_UP|PHYSTS_EXTENDED_REGS))
 	return TULIP_MEDIA_UNKNOWN;
 
     id = (tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDLOW) << 16) |
 	tulip_mii_readreg(sc, sc->tulip_phyaddr, PHYREG_IDHIGH);
     for (attr = tulip_mii_phy_attrlist;; attr++) {
 	if (attr->attr_id == 0)
 	    return TULIP_MEDIA_UNKNOWN;
 	if ((id & ~0x0F) == attr->attr_id)
 	    break;
     }
 
     if (attr->attr_modes[PHY_MODE_100TX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100TX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 2;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_100T4].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_100T4];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 3;
     }
     if (idx == 0 && attr->attr_modes[PHY_MODE_10T].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_10T];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	if ((data & pm->pm_mask) == pm->pm_value)
 	    idx = 1;
     } 
     if (idx != 0 && attr->attr_modes[PHY_MODE_FULLDUPLEX].pm_regno) {
 	const tulip_phy_modedata_t * const pm = &attr->attr_modes[PHY_MODE_FULLDUPLEX];
 	data = tulip_mii_readreg(sc, sc->tulip_phyaddr, pm->pm_regno);
 	idx += ((data & pm->pm_mask) == pm->pm_value ? 4 : 0);
     }
     return table[idx];
 }
 
 static unsigned
 tulip_mii_get_phyaddr(
     tulip_softc_t * const sc,
     unsigned offset)
 {
     unsigned phyaddr;
 
     for (phyaddr = 1; phyaddr < 32; phyaddr++) {
 	unsigned status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    continue;
 	if (offset == 0)
 	    return phyaddr;
 	offset--;
     }
     if (offset == 0) {
 	unsigned status = tulip_mii_readreg(sc, 0, PHYREG_STATUS);
 	if (status == 0 || status == 0xFFFF || status < PHYSTS_10BASET)
 	    return TULIP_MII_NOPHY;
 	return 0;
     }
     return TULIP_MII_NOPHY;
 }
 
 static int
 tulip_mii_map_abilities(
     tulip_softc_t * const sc,
     unsigned abilities)
 {
     sc->tulip_abilities = abilities;
     if (abilities & PHYSTS_100BASETX_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX_FD;
     } else if (abilities & PHYSTS_100BASET4) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASET4;
     } else if (abilities & PHYSTS_100BASETX) {
 	sc->tulip_probe_media = TULIP_MEDIA_100BASETX;
     } else if (abilities & PHYSTS_10BASET_FD) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET_FD;
     } else if (abilities & PHYSTS_10BASET) {
 	sc->tulip_probe_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 	return 0;
     }
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     return 1;
 }
 
 static void
 tulip_mii_autonegotiate(
     tulip_softc_t * const sc,
     const unsigned phyaddr)
 {
     switch (sc->tulip_probe_state) {
         case TULIP_PROBE_MEDIATEST:
         case TULIP_PROBE_INACTIVE: {
 	    sc->tulip_flags |= TULIP_DIDNWAY;
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, PHYCTL_RESET);
 	    sc->tulip_probe_timeout = 3000;
 	    sc->tulip_intrmask |= TULIP_STS_ABNRMLINTR|TULIP_STS_NORMALINTR;
 	    sc->tulip_probe_state = TULIP_PROBE_PHYRESET;
 	    /* FALL THROUGH */
 	}
         case TULIP_PROBE_PHYRESET: {
 	    u_int32_t status;
 	    u_int32_t data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 	    if (data & PHYCTL_RESET) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 		printf(TULIP_PRINTF_FMT "(phy%d): error: reset of PHY never completed!\n",
 			   TULIP_PRINTF_ARGS, phyaddr);
 		sc->tulip_flags &= ~TULIP_TXPROBE_ACTIVE;
 		sc->tulip_probe_state = TULIP_PROBE_FAILED;
 		sc->tulip_if.if_flags &= ~(IFF_UP|IFF_RUNNING);
 		return;
 	    }
 	    status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    if ((status & PHYSTS_CAN_AUTONEG) == 0) {
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation disabled\n",
 			   TULIP_PRINTF_ARGS, phyaddr);
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    if (tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT) != ((status >> 6) | 0x01))
 		tulip_mii_writereg(sc, phyaddr, PHYREG_AUTONEG_ADVERTISEMENT, (status >> 6) | 0x01);
 	    tulip_mii_writereg(sc, phyaddr, PHYREG_CONTROL, data|PHYCTL_AUTONEG_RESTART|PHYCTL_AUTONEG_ENABLE);
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL);
 #if defined(TULIP_DEBUG)
 	    if ((data & PHYCTL_AUTONEG_ENABLE) == 0)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): oops: enable autonegotiation failed: 0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, data);
 	    else
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation restarted: 0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, data);
 	    sc->tulip_dbg.dbg_nway_starts++;
 #endif
 	    sc->tulip_probe_state = TULIP_PROBE_PHYAUTONEG;
 	    sc->tulip_probe_timeout = 3000;
 	    /* FALL THROUGH */
 	}
         case TULIP_PROBE_PHYAUTONEG: {
 	    u_int32_t status = tulip_mii_readreg(sc, phyaddr, PHYREG_STATUS);
 	    u_int32_t data;
 	    if ((status & PHYSTS_AUTONEG_DONE) == 0) {
 		if (sc->tulip_probe_timeout > 0) {
 		    tulip_timeout(sc);
 		    return;
 		}
 #if defined(TULIP_DEBUG)
 		loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation timeout: sts=0x%04x, ctl=0x%04x\n",
 			   TULIP_PRINTF_ARGS, phyaddr, status,
 			   tulip_mii_readreg(sc, phyaddr, PHYREG_CONTROL));
 #endif
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		sc->tulip_probe_state = TULIP_PROBE_MEDIATEST;
 		return;
 	    }
 	    data = tulip_mii_readreg(sc, phyaddr, PHYREG_AUTONEG_ABILITIES);
 #if defined(TULIP_DEBUG)
 	    loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation complete: 0x%04x\n",
 		       TULIP_PRINTF_ARGS, phyaddr, data);
 #endif
 	    data = (data << 6) & status;
 	    if (!tulip_mii_map_abilities(sc, data))
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 	    return;
 	}
 	default: {
 #if defined(DIAGNOSTIC)
 	    panic("tulip_media_poll: botch at line %d\n", __LINE__);
 #endif
 	    break;
 	}
     }
 #if defined(TULIP_DEBUG)
     loudprintf(TULIP_PRINTF_FMT "(phy%d): autonegotiation failure: state = %d\n",
 	       TULIP_PRINTF_ARGS, phyaddr, sc->tulip_probe_state);
 	    sc->tulip_dbg.dbg_nway_failures++;
 #endif
 }
 
 static void
 tulip_2114x_media_preset(
     tulip_softc_t * const sc)
 {
     const tulip_media_info_t *mi = NULL;
     tulip_media_t media = sc->tulip_media;
 
     if (sc->tulip_probe_state == TULIP_PROBE_INACTIVE)
 	media = sc->tulip_media;
     else
 	media = sc->tulip_probe_media;
     
     sc->tulip_cmdmode &= ~TULIP_CMD_PORTSELECT;
     sc->tulip_flags &= ~TULIP_SQETEST;
     if (media != TULIP_MEDIA_UNKNOWN && media != TULIP_MEDIA_MAX) {
 #if defined(TULIP_DEBUG)
 	if (media < TULIP_MEDIA_MAX && sc->tulip_mediums[media] != NULL) {
 #endif
 	    mi = sc->tulip_mediums[media];
 	    if (mi->mi_type == TULIP_MEDIAINFO_MII) {
 		sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_GPR
 		       || mi->mi_type == TULIP_MEDIAINFO_SYM) {
 		sc->tulip_cmdmode &= ~TULIP_GPR_CMDBITS;
 		sc->tulip_cmdmode |= mi->mi_cmdmode;
 	    } else if (mi->mi_type == TULIP_MEDIAINFO_SIA) {
 		TULIP_CSR_WRITE(sc, csr_sia_connectivity, TULIP_SIACONN_RESET);
 	    }
 #if defined(TULIP_DEBUG)
 	} else {
 	    printf(TULIP_PRINTF_FMT ": preset: bad media %d!\n",
 		   TULIP_PRINTF_ARGS, media);
 	}
 #endif
     }
     switch (media) {
 	case TULIP_MEDIA_BNC:
 	case TULIP_MEDIA_AUI:
 	case TULIP_MEDIA_10BASET: {
 	    sc->tulip_cmdmode &= ~TULIP_CMD_FULLDUPLEX;
 	    sc->tulip_cmdmode |= TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 10000000;
 	    sc->tulip_flags |= TULIP_SQETEST;
 	    break;
 	}
 	case TULIP_MEDIA_10BASET_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 10000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX:
 	case TULIP_MEDIA_100BASET4:
 	case TULIP_MEDIA_100BASETX: {
 	    sc->tulip_cmdmode &= ~(TULIP_CMD_FULLDUPLEX|TULIP_CMD_TXTHRSHLDCTL);
 	    sc->tulip_cmdmode |= TULIP_CMD_PORTSELECT;
 	    sc->tulip_if.if_baudrate = 100000000;
 	    break;
 	}
 	case TULIP_MEDIA_100BASEFX_FD:
 	case TULIP_MEDIA_100BASETX_FD: {
 	    sc->tulip_cmdmode |= TULIP_CMD_FULLDUPLEX|TULIP_CMD_PORTSELECT;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_TXTHRSHLDCTL;
 	    sc->tulip_if.if_baudrate = 100000000;
 	    break;
 	}
 	default: {
 	    break;
 	}
     }
     TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 }
 
 /*
  ********************************************************************
  *  Start of 21140/21140A support which does not use the MII interface 
  */
 
 static void
 tulip_null_media_poll(
     tulip_softc_t * const sc,
     tulip_mediapoll_event_t event)
 {
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_events[event]++;
 #endif
 #if defined(DIAGNOSTIC)
     printf(TULIP_PRINTF_FMT ": botch(media_poll) at line %d\n",
 	   TULIP_PRINTF_ARGS, __LINE__);
 #endif
 }
 
 __inline__ static void
 tulip_21140_mediainit(
     tulip_softc_t * const sc,
     tulip_media_info_t * const mip,
     tulip_media_t const media,
     unsigned gpdata,
     unsigned cmdmode)
 {
     sc->tulip_mediums[media] = mip;
     mip->mi_type = TULIP_MEDIAINFO_GPR;
     mip->mi_cmdmode = cmdmode;
     mip->mi_gpdata = gpdata;
 }
 
 static void
 tulip_21140_evalboard_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
 
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     if ((TULIP_CSR_READ(sc, csr_gp) & TULIP_GP_EB_OK100) != 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EB_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_eb_boardsw = {
     TULIP_21140_DEC_EB,
     tulip_21140_evalboard_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_accton_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     unsigned gpdata;
 
     sc->tulip_gpinit = TULIP_GP_EB_PINS;
     sc->tulip_gpdata = TULIP_GP_EB_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EB_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
     DELAY(1000000);
     gpdata = TULIP_CSR_READ(sc, csr_gp);
     if ((gpdata & TULIP_GP_EN1207_UTP_INIT) == 0) {
 	sc->tulip_media = TULIP_MEDIA_10BASET;
     } else {
 	if ((gpdata & TULIP_GP_EN1207_BNC_INIT) == 0) {
 		sc->tulip_media = TULIP_MEDIA_BNC;
         } else {
 		sc->tulip_media = TULIP_MEDIA_100BASETX;
         }
     }
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_BNC,
 			  TULIP_GP_EN1207_BNC_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_EN1207_UTP_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EN1207_100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_accton_boardsw = {
     TULIP_21140_EN1207,
     tulip_21140_accton_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_smc9332_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int idx, cnt = 0;
 
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT|TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
     TULIP_CSR_WRITE(sc, csr_command, TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     sc->tulip_gpinit = TULIP_GP_SMC_9332_PINS;
     sc->tulip_gpdata = TULIP_GP_SMC_9332_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_PINS|TULIP_GP_PINSET);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_SMC_9332_INIT);
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) == (TULIP_GP_SMC_9332_OK10|TULIP_GP_SMC_9332_OK100)) {
 	    if (++cnt > 100)
 		break;
 	} else if ((csr & TULIP_GP_SMC_9332_OK10) == 0) {
 	    break;
 	} else {
 	    cnt = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_SMC_9332_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
 }
  
 static const tulip_boardsw_t tulip_21140_smc9332_boardsw = {
     TULIP_21140_SMC_9332,
     tulip_21140_smc9332_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_21140_cogent_em100_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     u_int32_t cmdmode = TULIP_CSR_READ(sc, csr_command);
 
     sc->tulip_gpinit = TULIP_GP_EM100_PINS;
     sc->tulip_gpdata = TULIP_GP_EM100_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_EM100_INIT);
 
     cmdmode = TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION|TULIP_CMD_MUSTBEONE;
     cmdmode &= ~(TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_SCRAMBLER);
     if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode);
 	sc->tulip_media = TULIP_MEDIA_100BASEFX;
 
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASEFX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_FULLDUPLEX);
     } else {
 	TULIP_CSR_WRITE(sc, csr_command, cmdmode|TULIP_CMD_SCRAMBLER);
 	sc->tulip_media = TULIP_MEDIA_100BASETX;
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
 	tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_EM100_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
     }
 }
 
 static const tulip_boardsw_t tulip_21140_cogent_em100_boardsw = {
     TULIP_21140_COGENT_EM100,
     tulip_21140_cogent_em100_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset
 };
 
 static void
 tulip_21140_znyx_zx34x_media_probe(
     tulip_softc_t * const sc)
 {
     tulip_media_info_t *mip = sc->tulip_mediainfo;
     int cnt10 = 0, cnt100 = 0, idx;
 
     sc->tulip_gpinit = TULIP_GP_ZX34X_PINS;
     sc->tulip_gpdata = TULIP_GP_ZX34X_INIT;
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_PINS);
     TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ZX34X_INIT);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) | TULIP_CMD_PORTSELECT |
 	TULIP_CMD_PCSFUNCTION | TULIP_CMD_SCRAMBLER | TULIP_CMD_MUSTBEONE);
     TULIP_CSR_WRITE(sc, csr_command,
 	TULIP_CSR_READ(sc, csr_command) & ~TULIP_CMD_TXTHRSHLDCTL);
 
     DELAY(200000);
     for (idx = 1000; idx > 0; idx--) {
 	u_int32_t csr = TULIP_CSR_READ(sc, csr_gp);
 	if ((csr & (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) == (TULIP_GP_ZX34X_LNKFAIL|TULIP_GP_ZX34X_SYMDET|TULIP_GP_ZX34X_SIGDET)) {
 	    if (++cnt100 > 100)
 		break;
 	} else if ((csr & TULIP_GP_ZX34X_LNKFAIL) == 0) {
 	    if (++cnt10 > 100)
 		break;
 	} else {
 	    cnt10 = 0;
 	    cnt100 = 0;
 	}
 	DELAY(1000);
     }
     sc->tulip_media = cnt100 > 100 ? TULIP_MEDIA_100BASETX : TULIP_MEDIA_10BASET;
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_10BASET_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_TXTHRSHLDCTL|TULIP_CMD_FULLDUPLEX);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER);
     tulip_21140_mediainit(sc, mip++, TULIP_MEDIA_100BASETX_FD,
 			  TULIP_GP_ZX34X_INIT,
 			  TULIP_CMD_PORTSELECT|TULIP_CMD_PCSFUNCTION
 			      |TULIP_CMD_SCRAMBLER|TULIP_CMD_FULLDUPLEX);
 }
 
 static const tulip_boardsw_t tulip_21140_znyx_zx34x_boardsw = {
     TULIP_21140_ZNYX_ZX34X,
     tulip_21140_znyx_zx34x_media_probe,
     tulip_media_select,
     tulip_null_media_poll,
     tulip_2114x_media_preset,
 };
 
 static void
 tulip_2114x_media_probe(
     tulip_softc_t * const sc)
 {
     sc->tulip_cmdmode |= TULIP_CMD_MUSTBEONE
 	|TULIP_CMD_BACKOFFCTR|TULIP_CMD_THRSHLD72;
 }
 
 static const tulip_boardsw_t tulip_2114x_isv_boardsw = {
     TULIP_21140_ISV,
     tulip_2114x_media_probe,
     tulip_media_select,
     tulip_media_poll,
     tulip_2114x_media_preset,
 };
 
 /*
  * ******** END of chip-specific handlers. ***********
  */
 
 /*
  * Code the read the SROM and MII bit streams (I2C)
  */
 static void
 tulip_delay_300ns(
     tulip_softc_t * const sc)
 {
     int idx;
     for (idx = (300 / 33) + 1; idx > 0; idx--)
 	(void) TULIP_CSR_READ(sc, csr_busmode);
 }
 
 #define EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0)
 
 static void
 tulip_srom_idle(
     tulip_softc_t * const sc)
 {
     unsigned bit, csr;
     
     csr  = SROMSEL ; EMIT;
     csr  = SROMSEL | SROMRD; EMIT;  
     csr ^= SROMCS; EMIT;
     csr ^= SROMCLKON; EMIT;
 
     /*
      * Write 25 cycles of 0 which will force the SROM to be idle.
      */
     for (bit = 3 + SROM_BITWIDTH + 16; bit > 0; bit--) {
         csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
     }
     csr ^= SROMCLKOFF; EMIT;
     csr ^= SROMCS; EMIT;
     csr  = 0; EMIT;
 }
 
      
 static void
 tulip_srom_read(
     tulip_softc_t * const sc)
 {   
     unsigned idx; 
     const unsigned bitwidth = SROM_BITWIDTH;
     const unsigned cmdmask = (SROMCMD_RD << bitwidth);
     const unsigned msb = 1 << (bitwidth + 3 - 1);
     unsigned lastidx = (1 << bitwidth) - 1;
 
     tulip_srom_idle(sc);
 
     for (idx = 0; idx <= lastidx; idx++) {
         unsigned lastbit, data, bits, bit, csr;
 	csr  = SROMSEL ;	        EMIT;
         csr  = SROMSEL | SROMRD;        EMIT;
         csr ^= SROMCSON;                EMIT;
         csr ^=            SROMCLKON;    EMIT;
     
         lastbit = 0;
         for (bits = idx|cmdmask, bit = bitwidth + 3; bit > 0; bit--, bits <<= 1) {
             const unsigned thisbit = bits & msb;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
             if (thisbit != lastbit) {
                 csr ^= SROMDOUT; EMIT;  /* clock low; invert data */
             } else {
 		EMIT;
 	    }
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */
             lastbit = thisbit;
         }
         csr ^= SROMCLKOFF; EMIT;
 
         for (data = 0, bits = 0; bits < 16; bits++) {
             data <<= 1;
             csr ^= SROMCLKON; EMIT;     /* clock high; data valid */ 
             data |= TULIP_CSR_READ(sc, csr_srom_mii) & SROMDIN ? 1 : 0;
             csr ^= SROMCLKOFF; EMIT;    /* clock low; data not valid */
         }
 	sc->tulip_rombuf[idx*2] = data & 0xFF;
 	sc->tulip_rombuf[idx*2+1] = data >> 8;
 	csr  = SROMSEL | SROMRD; EMIT;
 	csr  = 0; EMIT;
     }
     tulip_srom_idle(sc);
 }
 
 #define MII_EMIT    do { TULIP_CSR_WRITE(sc, csr_srom_mii, csr); tulip_delay_300ns(sc); } while (0)
 
 static void
 tulip_mii_writebits(
     tulip_softc_t * const sc,
     unsigned data,
     unsigned bits)
 {
     unsigned msb = 1 << (bits - 1);
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned lastbit = (csr & MII_DOUT) ? msb : 0;
 
     csr |= MII_WR; MII_EMIT;  		/* clock low; assert write */
 
     for (; bits > 0; bits--, data <<= 1) {
 	const unsigned thisbit = data & msb;
 	if (thisbit != lastbit) {
 	    csr ^= MII_DOUT; MII_EMIT;  /* clock low; invert data */
 	}
 	csr ^= MII_CLKON; MII_EMIT;     /* clock high; data valid */
 	lastbit = thisbit;
 	csr ^= MII_CLKOFF; MII_EMIT;    /* clock low; data not valid */
     }
 }
 
 static void
 tulip_mii_turnaround(
     tulip_softc_t * const sc,
     unsigned cmd)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
 
     if (cmd == MII_WRCMD) {
 	csr |= MII_DOUT; MII_EMIT;	/* clock low; change data */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 	csr ^= MII_DOUT; MII_EMIT;	/* clock low; change data */
     } else {
 	csr |= MII_RD; MII_EMIT;	/* clock low; switch to read */
     }
     csr ^= MII_CLKON; MII_EMIT;		/* clock high; data valid */
     csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
 }
 
 static unsigned
 tulip_mii_readbits(
     tulip_softc_t * const sc)
 {
     unsigned data;
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     int idx;
 
     for (idx = 0, data = 0; idx < 16; idx++) {
 	data <<= 1;	/* this is NOOP on the first pass through */
 	csr ^= MII_CLKON; MII_EMIT;	/* clock high; data valid */
 	if (TULIP_CSR_READ(sc, csr_srom_mii) & MII_DIN)
 	    data |= 1;
 	csr ^= MII_CLKOFF; MII_EMIT;	/* clock low; data not valid */
     }
     csr ^= MII_RD; MII_EMIT;		/* clock low; turn off read */
 
     return data;
 }
 
 static unsigned
 tulip_mii_readreg(
     tulip_softc_t * const sc,
     unsigned devaddr,
     unsigned regno)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     unsigned data;
 
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_RDCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_RDCMD);
 
     data = tulip_mii_readbits(sc);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][0] = data;
     sc->tulip_dbg.dbg_phyregs[regno][1]++;
 #endif
     return data;
 }
 
 static void
 tulip_mii_writereg(
     tulip_softc_t * const sc,
     unsigned devaddr,
     unsigned regno,
     unsigned data)
 {
     unsigned csr = TULIP_CSR_READ(sc, csr_srom_mii) & (MII_RD|MII_DOUT|MII_CLK);
     csr &= ~(MII_RD|MII_CLK); MII_EMIT;
     tulip_mii_writebits(sc, MII_PREAMBLE, 32);
     tulip_mii_writebits(sc, MII_WRCMD, 8);
     tulip_mii_writebits(sc, devaddr, 5);
     tulip_mii_writebits(sc, regno, 5);
     tulip_mii_turnaround(sc, MII_WRCMD);
     tulip_mii_writebits(sc, data, 16);
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_phyregs[regno][2] = data;
     sc->tulip_dbg.dbg_phyregs[regno][3]++;
 #endif
 }
 
 #define	tulip_mchash(mca)	(tulip_crc32(mca, 6) & 0x1FF)
 #define	tulip_srom_crcok(databuf)	( \
     ((tulip_crc32(databuf, 126) & 0xFFFFU) ^ 0xFFFFU) == \
      ((databuf)[126] | ((databuf)[127] << 8)))
 
 static unsigned
 tulip_crc32(
     const unsigned char *databuf,
     size_t datalen)
 {
     u_int idx, bit, data, crc = 0xFFFFFFFFUL;
 
     for (idx = 0; idx < datalen; idx++)
         for (data = *databuf++, bit = 0; bit < 8; bit++, data >>= 1)
             crc = (crc >> 1) ^ (((crc ^ data) & 1) ? TULIP_CRC32_POLY : 0);
     return crc;
 }
 
 static void
 tulip_identify_dec_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "DEC ");
 #define D0	4
     if (sc->tulip_chipid <= TULIP_DE425)
 	return;
     if (bcmp(sc->tulip_rombuf + 29, "DE500", 5) == 0
 	|| bcmp(sc->tulip_rombuf + 29, "DE450", 5) == 0) {
 	bcopy(sc->tulip_rombuf + 29, &sc->tulip_boardid[D0], 8);
 	sc->tulip_boardid[D0+8] = ' ';
     }
 #undef D0
 }
 
 static void
 tulip_identify_znyx_nic(
     tulip_softc_t * const sc)
 {
     unsigned id = 0;
     strcpy(sc->tulip_boardid, "ZNYX ZX3XX ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	unsigned znyx_ptr;
 	sc->tulip_boardid[8] = '4';
 	znyx_ptr = sc->tulip_rombuf[124] + 256 * sc->tulip_rombuf[125];
 	if (znyx_ptr < 26 || znyx_ptr > 116) {
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    return;
 	}
 	/* ZX344 = 0010 .. 0013FF
 	 */
 	if (sc->tulip_rombuf[znyx_ptr] == 0x4A
 		&& sc->tulip_rombuf[znyx_ptr + 1] == 0x52
 		&& sc->tulip_rombuf[znyx_ptr + 2] == 0x01) {
 	    id = sc->tulip_rombuf[znyx_ptr + 5] + 256 * sc->tulip_rombuf[znyx_ptr + 4];
 	    if ((id >> 8) == (TULIP_ZNYX_ID_ZX342 >> 8)) {
 		sc->tulip_boardid[9] = '2';
 		if (id == TULIP_ZNYX_ID_ZX342B) {
 		    sc->tulip_boardid[10] = 'B';
 		    sc->tulip_boardid[11] = ' ';
 		}
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX344) {
 		sc->tulip_boardid[10] = '4';
 		sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	    } else if (id == TULIP_ZNYX_ID_ZX345) {
 		sc->tulip_boardid[9] = (sc->tulip_rombuf[19] > 1) ? '8' : '5';
 	    } else if (id == TULIP_ZNYX_ID_ZX346) {
 		sc->tulip_boardid[9] = '6';
 	    } else if (id == TULIP_ZNYX_ID_ZX351) {
 		sc->tulip_boardid[8] = '5';
 		sc->tulip_boardid[9] = '1';
 	    }
 	}
 	if (id == 0) {
 	    /*
 	     * Assume it's a ZX342...
 	     */
 	    sc->tulip_boardsw = &tulip_21140_znyx_zx34x_boardsw;
 	}
 	return;
     }
     sc->tulip_boardid[8] = '1';
     if (sc->tulip_chipid == TULIP_21041) {
 	sc->tulip_boardid[10] = '1';
 	return;
     }
     if (sc->tulip_rombuf[32] == 0x4A && sc->tulip_rombuf[33] == 0x52) {
 	id = sc->tulip_rombuf[37] + 256 * sc->tulip_rombuf[36];
 	if (id == TULIP_ZNYX_ID_ZX312T) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardid[10] = 'T';
 	    sc->tulip_boardid[11] = ' ';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	} else if (id == TULIP_ZNYX_ID_ZX314_INTA) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX314) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315_INTA) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if (id == TULIP_ZNYX_ID_ZX315) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_features |= TULIP_HAVE_BASEROM;
 	} else {
 	    id = 0;
 	}
     }		    
     if (id == 0) {
 	if ((sc->tulip_enaddr[3] & ~3) == 0xF0 && (sc->tulip_enaddr[5] & 2) == 0) {
 	    sc->tulip_boardid[9] = '4';
 	    sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xF4 && (sc->tulip_enaddr[5] & 1) == 0) {
 	    sc->tulip_boardid[9] = '5';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
 	} else if ((sc->tulip_enaddr[3] & ~3) == 0xEC) {
 	    sc->tulip_boardid[9] = '2';
 	    sc->tulip_boardsw = &tulip_21040_boardsw;
 	}
     }
 }
 
 static void
 tulip_identify_smc_nic(
     tulip_softc_t * const sc)
 {
     u_int32_t id1, id2, ei;
     int auibnc = 0, utp = 0;
     char *cp;
 
     strcpy(sc->tulip_boardid, "SMC ");
     if (sc->tulip_chipid == TULIP_21041)
 	return;
     if (sc->tulip_chipid != TULIP_21040) {
 	if (sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	    strcpy(&sc->tulip_boardid[4], "9332DST ");
 	    sc->tulip_boardsw = &tulip_21140_smc9332_boardsw;
 	} else if (sc->tulip_features & (TULIP_HAVE_BASEROM|TULIP_HAVE_SLAVEDROM)) {
 	    strcpy(&sc->tulip_boardid[4], "9334BDT ");
 	} else {
 	    strcpy(&sc->tulip_boardid[4], "9332BDT ");
 	}
 	return;
     }
     id1 = sc->tulip_rombuf[0x60] | (sc->tulip_rombuf[0x61] << 8);
     id2 = sc->tulip_rombuf[0x62] | (sc->tulip_rombuf[0x63] << 8);
     ei  = sc->tulip_rombuf[0x66] | (sc->tulip_rombuf[0x67] << 8);
 
     strcpy(&sc->tulip_boardid[4], "8432");
     cp = &sc->tulip_boardid[8];
     if ((id1 & 1) == 0)
 	*cp++ = 'B', auibnc = 1;
     if ((id1 & 0xFF) > 0x32)
 	*cp++ = 'T', utp = 1;
     if ((id1 & 0x4000) == 0)
 	*cp++ = 'A', auibnc = 1;
     if (id2 == 0x15) {
 	sc->tulip_boardid[7] = '4';
 	*cp++ = '-';
 	*cp++ = 'C';
 	*cp++ = 'H';
 	*cp++ = (ei ? '2' : '1');
     }
     *cp++ = ' ';
     *cp = '\0';
     if (utp && !auibnc)
 	sc->tulip_boardsw = &tulip_21040_10baset_only_boardsw;
     else if (!utp && auibnc)
 	sc->tulip_boardsw = &tulip_21040_auibnc_only_boardsw;
 }
 
 static void
 tulip_identify_cogent_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "Cogent ");
     if (sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A) {
 	if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100TX_ID) {
 	    strcat(sc->tulip_boardid, "EM100FX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 	} else if (sc->tulip_rombuf[32] == TULIP_COGENT_EM100FX_ID) {
 	    strcat(sc->tulip_boardid, "EM100FX ");
 	    sc->tulip_boardsw = &tulip_21140_cogent_em100_boardsw;
 	}
 	/*
 	 * Magic number (0x24001109U) is the SubVendor (0x2400) and
 	 * SubDevId (0x1109) for the ANA6944TX (EM440TX).
 	 */
 	if (*(u_int32_t *) sc->tulip_rombuf == 0x24001109U
 		&& (sc->tulip_features & TULIP_HAVE_BASEROM)) {
 	    /*
 	     * Cogent (Adaptec) is still mapping all INTs to INTA of
 	     * first 21140.  Dumb!  Dumb!
 	     */
 	    strcat(sc->tulip_boardid, "EM440TX ");
 	    sc->tulip_features |= TULIP_HAVE_SHAREDINTR;
 	}
     } else if (sc->tulip_chipid == TULIP_21040) {
 	sc->tulip_features |= TULIP_HAVE_SHAREDINTR|TULIP_HAVE_BASEROM;
     }
 }
 
 static void
 tulip_identify_accton_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "ACCTON ");
     switch (sc->tulip_chipid) {
 	case TULIP_21140A:
 	    strcat(sc->tulip_boardid, "EN1207 ");
 	    sc->tulip_boardsw = &tulip_21140_accton_boardsw;
 	    break;
 	case TULIP_21140:
 	    strcat(sc->tulip_boardid, "EN1207TX ");
 	    sc->tulip_boardsw = &tulip_21140_eb_boardsw;
             break;
         case TULIP_21040:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21040_boardsw;
             break;
         case TULIP_21041:
 	    strcat(sc->tulip_boardid, "EN1203 ");
             sc->tulip_boardsw = &tulip_21041_boardsw;
             break;
 	default:
             sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
             break;
     }
 }
 
 static void
 tulip_identify_asante_nic(
     tulip_softc_t * const sc)
 {
     strcpy(sc->tulip_boardid, "Asante ");
     if ((sc->tulip_chipid == TULIP_21140 || sc->tulip_chipid == TULIP_21140A)
 	    && sc->tulip_boardsw != &tulip_2114x_isv_boardsw) {
 	tulip_media_info_t *mi = sc->tulip_mediainfo;
 	int idx;
 	/*
 	 * The Asante Fast Ethernet doesn't always ship with a valid
 	 * new format SROM.  So if isn't in the new format, we cheat
 	 * set it up as if we had.
 	 */
 
 	sc->tulip_gpinit = TULIP_GP_ASANTE_PINS;
 	sc->tulip_gpdata = 0;
 
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PINS|TULIP_GP_PINSET);
 	TULIP_CSR_WRITE(sc, csr_gp, TULIP_GP_ASANTE_PHYRESET);
 	DELAY(100);
 	TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 	mi->mi_type = TULIP_MEDIAINFO_MII;
 	mi->mi_gpr_length = 0;
 	mi->mi_gpr_offset = 0;
 	mi->mi_reset_length = 0;
 	mi->mi_reset_offset = 0;;
 
 	mi->mi_phyaddr = TULIP_MII_NOPHY;
 	for (idx = 20; idx > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx--) {
 	    DELAY(10000);
 	    mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, 0);
 	}
 	if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 	    printf(TULIP_PRINTF_FMT ": can't find phy 0\n", TULIP_PRINTF_ARGS);
 	    return;
 	}
 
 	sc->tulip_features |= TULIP_HAVE_MII;
 	mi->mi_capabilities  = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_advertisement = PHYSTS_10BASET|PHYSTS_10BASET_FD|PHYSTS_100BASETX|PHYSTS_100BASETX_FD;
 	mi->mi_full_duplex   = PHYSTS_10BASET_FD|PHYSTS_100BASETX_FD;
 	mi->mi_tx_threshold  = PHYSTS_10BASET|PHYSTS_10BASET_FD;
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 	TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 	mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 	    tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 
 	sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
     }
 }
 
 static int
 tulip_srom_decode(
     tulip_softc_t * const sc)
 {
     unsigned idx1, idx2, idx3;
 
     const tulip_srom_header_t *shp = (tulip_srom_header_t *) &sc->tulip_rombuf[0];
     const tulip_srom_adapter_info_t *saip = (tulip_srom_adapter_info_t *) (shp + 1);
     tulip_srom_media_t srom_media;
     tulip_media_info_t *mi = sc->tulip_mediainfo;
     const u_int8_t *dp;
     u_int32_t leaf_offset, blocks, data;
 
     for (idx1 = 0; idx1 < shp->sh_adapter_count; idx1++, saip++) {
 	if (shp->sh_adapter_count == 1)
 	    break;
 	if (saip->sai_device == sc->tulip_pci_devno)
 	    break;
     }
     /*
      * Didn't find the right media block for this card.
      */
     if (idx1 == shp->sh_adapter_count)
 	return 0;
 
     /*
      * Save the hardware address.
      */
     bcopy((caddr_t) shp->sh_ieee802_address, (caddr_t) sc->tulip_enaddr, 6);
     /*
      * If this is a multiple port card, add the adapter index to the last
      * byte of the hardware address.  (if it isn't multiport, adding 0
      * won't hurt.
      */
     sc->tulip_enaddr[5] += idx1;
 
     leaf_offset = saip->sai_leaf_offset_lowbyte
 	+ saip->sai_leaf_offset_highbyte * 256;
     dp = sc->tulip_rombuf + leaf_offset;
 	
     sc->tulip_conntype = (tulip_srom_connection_t) (dp[0] + dp[1] * 256); dp += 2;
 
     for (idx2 = 0;; idx2++) {
 	if (tulip_srom_conninfo[idx2].sc_type == sc->tulip_conntype
 	        || tulip_srom_conninfo[idx2].sc_type == TULIP_SROM_CONNTYPE_NOT_USED)
 	    break;
     }
     sc->tulip_connidx = idx2;
 
     if (sc->tulip_chipid == TULIP_21041) {
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    tulip_media_t media;
 	    data = *dp++;
 	    srom_media = (tulip_srom_media_t) (data & 0x3F);
 	    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 		if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 		    break;
 	    }
 	    media = tulip_srom_mediums[idx3].sm_type;
 	    if (media != TULIP_MEDIA_UNKNOWN) {
 		if (data & TULIP_SROM_21041_EXTENDED) {
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_sia_connectivity = dp[0] + dp[1] * 256;
 		    mi->mi_sia_tx_rx        = dp[2] + dp[3] * 256;
 		    mi->mi_sia_general      = dp[4] + dp[5] * 256;
 		    mi++;
 		} else {
 		    switch (media) {
 			case TULIP_MEDIA_BNC: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_AUI: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET);
 			    mi++;
 			    break;
 			}
 			case TULIP_MEDIA_10BASET_FD: {
 			    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD);
 			    mi++;
 			    break;
 			}
 			default: {
 			    break;
 			}
 		    }
 		}
 	    }
 	    if (data & TULIP_SROM_21041_EXTENDED)	
 		dp += 6;
 	}
 #ifdef notdef
 	if (blocks == 0) {
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, BNC); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, AUI); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET); mi++;
 	    TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21041, 10BASET_FD); mi++;
 	}
 #endif
     } else {
 	unsigned length, type;
 	tulip_media_t gp_media = TULIP_MEDIA_UNKNOWN;
 	if (sc->tulip_features & TULIP_HAVE_GPR)
 	    sc->tulip_gpinit = *dp++;
 	blocks = *dp++;
 	for (idx2 = 0; idx2 < blocks; idx2++) {
 	    const u_int8_t *ep;
 	    if ((*dp & 0x80) == 0) {
 		length = 4;
 		type = 0;
 	    } else {
 		length = (*dp++ & 0x7f) - 1;
 		type = *dp++ & 0x3f;
 	    }
 	    ep = dp + length;
 	    switch (type & 0x3f) {
 		case 0: {	/* 21140[A] GPR block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_GPR;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpdata = dp[1];
 		    if (media > gp_media && !TULIP_IS_MEDIA_FD(media)) {
 			sc->tulip_gpdata = mi->mi_gpdata;
 			gp_media = media;
 		    }
 		    data = dp[2] + dp[3] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 #if 0
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 #endif
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    mi++;
 		    break;
 		}
 		case 1: {	/* 21140[A] MII block */
 		    const unsigned phyno = *dp++;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += mi->mi_reset_length;
 
 		    /*
 		     * Before we probe for a PHY, use the GPR information
 		     * to select it.  If we don't, it may be inaccessible.
 		     */
 		    TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_gpinit|TULIP_GP_PINSET);
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_reset_offset + idx3]);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_gp, sc->tulip_rombuf[mi->mi_gpr_offset + idx3]);
 		    }
 
 		    /*
 		     * At least write something!
 		     */
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_gp, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 			printf(TULIP_PRINTF_FMT ": can't find phy %d\n",
 			       TULIP_PRINTF_ARGS, phyno);
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 2: {	/* 2114[23] SIA block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SIA;
 		    sc->tulip_mediums[media] = mi;
 		    if (type & 0x40) {
 			mi->mi_sia_connectivity = dp[0] + dp[1] * 256;
 			mi->mi_sia_tx_rx        = dp[2] + dp[3] * 256;
 			mi->mi_sia_general      = dp[4] + dp[5] * 256;
 			dp += 6;
 		    } else {
 			switch (media) {
 			    case TULIP_MEDIA_BNC: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, BNC);
 				break;
 			    }
 			    case TULIP_MEDIA_AUI: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, AUI);
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET);
 				break;
 			    }
 			    case TULIP_MEDIA_10BASET_FD: {
 				TULIP_MEDIAINFO_SIA_INIT(sc, mi, 21142, 10BASET_FD);
 				break;
 			    }
 			    default: {
 				goto bad_media;
 			    }
 			}
 		    }
 		    mi->mi_sia_gp_control = (dp[0] + dp[1] * 256) << 16;
 		    mi->mi_sia_gp_data    = (dp[2] + dp[3] * 256) << 16;
 		    mi++;
 		  bad_media:
 		    break;
 		}
 		case 3: {	/* 2114[23] MII PHY block */
 		    const unsigned phyno = *dp++;
 		    const u_int8_t *dp0;
 		    mi->mi_type = TULIP_MEDIAINFO_MII;
 		    mi->mi_gpr_length = *dp++;
 		    mi->mi_gpr_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_gpr_length;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 
 		    dp0 = &sc->tulip_rombuf[mi->mi_reset_offset];
 		    for (idx3 = 0; idx3 < mi->mi_reset_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 		    sc->tulip_phyaddr = mi->mi_phyaddr;
 		    dp0 = &sc->tulip_rombuf[mi->mi_gpr_offset];
 		    for (idx3 = 0; idx3 < mi->mi_gpr_length; idx3++, dp0 += 2) {
 			DELAY(10);
 			TULIP_CSR_WRITE(sc, csr_sia_general, (dp0[0] + 256 * dp0[1]) << 16);
 		    }
 
 		    if (mi->mi_reset_length == 0 && mi->mi_gpr_length == 0)
 			TULIP_CSR_WRITE(sc, csr_sia_general, 0);
 
 		    mi->mi_phyaddr = TULIP_MII_NOPHY;
 		    for (idx3 = 20; idx3 > 0 && mi->mi_phyaddr == TULIP_MII_NOPHY; idx3--) {
 			DELAY(10000);
 			mi->mi_phyaddr = tulip_mii_get_phyaddr(sc, phyno);
 		    }
 		    if (mi->mi_phyaddr == TULIP_MII_NOPHY) {
 			printf(TULIP_PRINTF_FMT ": can't find phy %d\n",
 			       TULIP_PRINTF_ARGS, phyno);
 			break;
 		    }
 		    sc->tulip_features |= TULIP_HAVE_MII;
 		    mi->mi_capabilities  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_advertisement = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_full_duplex   = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_tx_threshold  = dp[0] + dp[1] * 256; dp += 2;
 		    mi->mi_mii_interrupt = dp[0] + dp[1] * 256; dp += 2;
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASETX);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 100BASET4);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET_FD);
 		    TULIP_MEDIAINFO_ADD_CAPABILITY(sc, mi, 10BASET);
 		    mi->mi_phyid = (tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDLOW) << 16) |
 			tulip_mii_readreg(sc, mi->mi_phyaddr, PHYREG_IDHIGH);
 		    mi++;
 		    break;
 		}
 		case 4: {	/* 21143 SYM block */
 		    tulip_media_t media;
 		    srom_media = (tulip_srom_media_t) dp[0];
 		    for (idx3 = 0; tulip_srom_mediums[idx3].sm_type != TULIP_MEDIA_UNKNOWN; idx3++) {
 			if (tulip_srom_mediums[idx3].sm_srom_type == srom_media)
 			    break;
 		    }
 		    media = tulip_srom_mediums[idx3].sm_type;
 		    if (media == TULIP_MEDIA_UNKNOWN)
 			break;
 		    mi->mi_type = TULIP_MEDIAINFO_SYM;
 		    sc->tulip_mediums[media] = mi;
 		    mi->mi_gpcontrol = (dp[1] + dp[2] * 256) << 16;
 		    mi->mi_gpdata    = (dp[3] + dp[4] * 256) << 16;
 		    data = dp[5] + dp[6] * 256;
 		    mi->mi_cmdmode = TULIP_SROM_2114X_CMDBITS(data);
 		    if (data & TULIP_SROM_2114X_NOINDICATOR) {
 			mi->mi_actmask = 0;
 		    } else {
 			mi->mi_default = (data & TULIP_SROM_2114X_DEFAULT) != 0;
 			mi->mi_actmask = TULIP_SROM_2114X_BITPOS(data);
 			mi->mi_actdata = (data & TULIP_SROM_2114X_POLARITY) ? 0 : mi->mi_actmask;
 		    }
 		    mi++;
 		    break;
 		}
 #if 0
 		case 5: {	/* 21143 Reset block */
 		    mi->mi_type = TULIP_MEDIAINFO_RESET;
 		    mi->mi_reset_length = *dp++;
 		    mi->mi_reset_offset = dp - sc->tulip_rombuf;
 		    dp += 2 * mi->mi_reset_length;
 		    mi++;
 		    break;
 		}
 #endif
 		default: {
 		}
 	    }
 	    dp = ep;
 	}
     }
     return mi - sc->tulip_mediainfo;
 }
 
 static const struct {
     void (*vendor_identify_nic)(tulip_softc_t * const sc);
     unsigned char vendor_oui[3];
 } tulip_vendors[] = {
     { tulip_identify_dec_nic,		{ 0x08, 0x00, 0x2B } },
     { tulip_identify_dec_nic,		{ 0x00, 0x00, 0xF8 } },
     { tulip_identify_smc_nic,		{ 0x00, 0x00, 0xC0 } },
     { tulip_identify_smc_nic,		{ 0x00, 0xE0, 0x29 } },
     { tulip_identify_znyx_nic,		{ 0x00, 0xC0, 0x95 } },
     { tulip_identify_cogent_nic,	{ 0x00, 0x00, 0x92 } },
     { tulip_identify_asante_nic,	{ 0x00, 0x00, 0x94 } },
     { tulip_identify_accton_nic,	{ 0x00, 0x00, 0xE8 } },
     { NULL }
 };
 
 /*
  * This deals with the vagaries of the address roms and the
  * brain-deadness that various vendors commit in using them.
  */
 static int
 tulip_read_macaddr(
     tulip_softc_t * const sc)
 {
     unsigned cksum, rom_cksum, idx;
     u_int32_t csr;
     unsigned char tmpbuf[8];
     static const u_char testpat[] = { 0xFF, 0, 0x55, 0xAA, 0xFF, 0, 0x55, 0xAA };
 
     sc->tulip_connidx = TULIP_SROM_LASTCONNIDX;
 
     if (sc->tulip_chipid == TULIP_21040) {
 	TULIP_CSR_WRITE(sc, csr_enetrom, 1);
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    int cnt = 0;
 	    while (((csr = TULIP_CSR_READ(sc, csr_enetrom)) & 0x80000000L) && cnt < 10000)
 		cnt++;
 	    sc->tulip_rombuf[idx] = csr & 0xFF;
 	}
 	sc->tulip_boardsw = &tulip_21040_boardsw;
 #if defined(TULIP_EISA)
     } else if (sc->tulip_chipid == TULIP_DE425) {
 	int cnt;
 	for (idx = 0, cnt = 0; idx < sizeof(testpat) && cnt < 32; cnt++) {
 	    tmpbuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom);
 	    if (tmpbuf[idx] == testpat[idx])
 		++idx;
 	    else
 		idx = 0;
 	}
 	for (idx = 0; idx < 32; idx++)
 	    sc->tulip_rombuf[idx] = TULIP_CSR_READBYTE(sc, csr_enetrom);
 	sc->tulip_boardsw = &tulip_21040_boardsw;
 #endif /* TULIP_EISA */
     } else {
 	if (sc->tulip_chipid == TULIP_21041) {
 	    /*
 	     * Thankfully all 21041's act the same.
 	     */
 	    sc->tulip_boardsw = &tulip_21041_boardsw;
 	} else {
 	    /*
 	     * Assume all 21140 board are compatible with the
 	     * DEC 10/100 evaluation board.  Not really valid but
 	     * it's the best we can do until every one switches to
 	     * the new SROM format.
 	     */
 	     
 	    sc->tulip_boardsw = &tulip_21140_eb_boardsw;
 	}
 	tulip_srom_read(sc);
 	if (tulip_srom_crcok(sc->tulip_rombuf)) {
 	    /*
 	     * SROM CRC is valid therefore it must be in the
 	     * new format.
 	     */
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM;
 	} else if (sc->tulip_rombuf[126] == 0xff && sc->tulip_rombuf[127] == 0xFF) {
 	    /*
 	     * No checksum is present.  See if the SROM id checks out;
 	     * the first 18 bytes should be 0 followed by a 1 followed
 	     * by the number of adapters (which we don't deal with yet).
 	     */
 	    for (idx = 0; idx < 18; idx++) {
 		if (sc->tulip_rombuf[idx] != 0)
 		    break;
 	    }
 	    if (idx == 18 && sc->tulip_rombuf[18] == 1 && sc->tulip_rombuf[19] != 0)
 		sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	} else if (sc->tulip_chipid >= TULIP_21142) {
 	    sc->tulip_features |= TULIP_HAVE_ISVSROM;
 	    sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 	}
 	if ((sc->tulip_features & TULIP_HAVE_ISVSROM) && tulip_srom_decode(sc)) {
 	    if (sc->tulip_chipid != TULIP_21041)
 		sc->tulip_boardsw = &tulip_2114x_isv_boardsw;
 
 	    /*
 	     * If the SROM specifies more than one adapter, tag this as a
 	     * BASE rom.
 	     */
 	    if (sc->tulip_rombuf[19] > 1)
 		sc->tulip_features |= TULIP_HAVE_BASEROM;
 	    if (sc->tulip_boardsw == NULL)
 		return -6;
 	    goto check_oui;
 	}
     }
 
 
     if (bcmp(&sc->tulip_rombuf[0], &sc->tulip_rombuf[16], 8) != 0) {
 	/*
 	 * Some folks don't use the standard ethernet rom format
 	 * but instead just put the address in the first 6 bytes
 	 * of the rom and let the rest be all 0xffs.  (Can we say
 	 * ZNYX???) (well sometimes they put in a checksum so we'll
 	 * start at 8).
 	 */
 	for (idx = 8; idx < 32; idx++) {
 	    if (sc->tulip_rombuf[idx] != 0xFF)
 		return -4;
 	}
 	/*
 	 * Make sure the address is not multicast or locally assigned
 	 * that the OUI is not 00-00-00.
 	 */
 	if ((sc->tulip_rombuf[0] & 3) != 0)
 	    return -4;
 	if (sc->tulip_rombuf[0] == 0 && sc->tulip_rombuf[1] == 0
 		&& sc->tulip_rombuf[2] == 0)
 	    return -4;
 	bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 	sc->tulip_features |= TULIP_HAVE_OKROM;
 	goto check_oui;
     } else {
 	/*
 	 * A number of makers of multiport boards (ZNYX and Cogent)
 	 * only put on one address ROM on their 21040 boards.  So
 	 * if the ROM is all zeros (or all 0xFFs), look at the
 	 * previous configured boards (as long as they are on the same
 	 * PCI bus and the bus number is non-zero) until we find the
 	 * master board with address ROM.  We then use its address ROM
 	 * as the base for this board.  (we add our relative board
 	 * to the last byte of its address).
 	 */
 	for (idx = 0; idx < sizeof(sc->tulip_rombuf); idx++) {
 	    if (sc->tulip_rombuf[idx] != 0 && sc->tulip_rombuf[idx] != 0xFF)
 		break;
 	}
 	if (idx == sizeof(sc->tulip_rombuf)) {
 	    int root_unit;
 	    tulip_softc_t *root_sc = NULL;
 	    for (root_unit = sc->tulip_unit - 1; root_unit >= 0; root_unit--) {
 		root_sc = TULIP_UNIT_TO_SOFTC(root_unit);
 		if (root_sc == NULL || (root_sc->tulip_features & (TULIP_HAVE_OKROM|TULIP_HAVE_SLAVEDROM)) == TULIP_HAVE_OKROM)
 		    break;
 		root_sc = NULL;
 	    }
 	    if (root_sc != NULL && (root_sc->tulip_features & TULIP_HAVE_BASEROM)
 		    && root_sc->tulip_chipid == sc->tulip_chipid
 		    && root_sc->tulip_pci_busno == sc->tulip_pci_busno) {
 		sc->tulip_features |= TULIP_HAVE_SLAVEDROM;
 		sc->tulip_boardsw = root_sc->tulip_boardsw;
 		strcpy(sc->tulip_boardid, root_sc->tulip_boardid);
 		if (sc->tulip_boardsw->bd_type == TULIP_21140_ISV) {
 		    bcopy(root_sc->tulip_rombuf, sc->tulip_rombuf,
 			  sizeof(sc->tulip_rombuf));
 		    if (!tulip_srom_decode(sc))
 			return -5;
 		} else {
 		    bcopy(root_sc->tulip_enaddr, sc->tulip_enaddr, 6);
 		    sc->tulip_enaddr[5] += sc->tulip_unit - root_sc->tulip_unit;
 		}
 		/*
 		 * Now for a truly disgusting kludge: all 4 21040s on
 		 * the ZX314 share the same INTA line so the mapping
 		 * setup by the BIOS on the PCI bridge is worthless.
 		 * Rather than reprogramming the value in the config
 		 * register, we will handle this internally.
 		 */
 		if (root_sc->tulip_features & TULIP_HAVE_SHAREDINTR) {
 		    sc->tulip_slaves = root_sc->tulip_slaves;
 		    root_sc->tulip_slaves = sc;
 		    sc->tulip_features |= TULIP_HAVE_SLAVEDINTR;
 		}
 		return 0;
 	    }
 	}
     }
 
     /*
      * This is the standard DEC address ROM test.
      */
 
     if (bcmp(&sc->tulip_rombuf[24], testpat, 8) != 0)
 	return -3;
 
     tmpbuf[0] = sc->tulip_rombuf[15]; tmpbuf[1] = sc->tulip_rombuf[14];
     tmpbuf[2] = sc->tulip_rombuf[13]; tmpbuf[3] = sc->tulip_rombuf[12];
     tmpbuf[4] = sc->tulip_rombuf[11]; tmpbuf[5] = sc->tulip_rombuf[10];
     tmpbuf[6] = sc->tulip_rombuf[9];  tmpbuf[7] = sc->tulip_rombuf[8];
     if (bcmp(&sc->tulip_rombuf[0], tmpbuf, 8) != 0)
 	return -2;
 
     bcopy(sc->tulip_rombuf, sc->tulip_enaddr, 6);
 
     cksum = *(u_int16_t *) &sc->tulip_enaddr[0];
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[2];
     if (cksum > 65535) cksum -= 65535;
     cksum *= 2;
     if (cksum > 65535) cksum -= 65535;
     cksum += *(u_int16_t *) &sc->tulip_enaddr[4];
     if (cksum >= 65535) cksum -= 65535;
 
     rom_cksum = *(u_int16_t *) &sc->tulip_rombuf[6];
 	
     if (cksum != rom_cksum)
 	return -1;
 
   check_oui:
     /*
      * Check for various boards based on OUI.  Did I say braindead?
      */
     for (idx = 0; tulip_vendors[idx].vendor_identify_nic != NULL; idx++) {
 	if (bcmp((caddr_t) sc->tulip_enaddr,
 		 (caddr_t) tulip_vendors[idx].vendor_oui, 3) == 0) {
 	    (*tulip_vendors[idx].vendor_identify_nic)(sc);
 	    break;
 	}
     }
 
     sc->tulip_features |= TULIP_HAVE_OKROM;
     return 0;
 }
 
 #if defined(IFM_ETHER)
 static void
 tulip_ifmedia_add(
     tulip_softc_t * const sc)
 {
     tulip_media_t media;
     int medias = 0;
 
     for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	if (sc->tulip_mediums[media] != NULL) {
 	    ifmedia_add(&sc->tulip_ifmedia, tulip_media_to_ifmedia[media],
 			0, 0);
 	    medias++;
 	}
     }
     if (medias == 0) {
 	sc->tulip_features |= TULIP_HAVE_NOMEDIA;
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_NONE);
     } else if (sc->tulip_media == TULIP_MEDIA_UNKNOWN) {
 	ifmedia_add(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO, 0, 0);
 	ifmedia_set(&sc->tulip_ifmedia, IFM_ETHER | IFM_AUTO);
     } else {
 	ifmedia_set(&sc->tulip_ifmedia, tulip_media_to_ifmedia[sc->tulip_media]);
 	sc->tulip_flags |= TULIP_PRINTMEDIA;
 	tulip_linkup(sc, sc->tulip_media);
     }
 }
 
 static int
 tulip_ifmedia_change(
     struct ifnet * const ifp)
 {
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     sc->tulip_flags |= TULIP_NEEDRESET;
     sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
     sc->tulip_media = TULIP_MEDIA_UNKNOWN;
     if (IFM_SUBTYPE(sc->tulip_ifmedia.ifm_media) != IFM_AUTO) {
 	tulip_media_t media;
 	for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 	    if (sc->tulip_mediums[media] != NULL
 		&& sc->tulip_ifmedia.ifm_media == tulip_media_to_ifmedia[media]) {
 		sc->tulip_flags |= TULIP_PRINTMEDIA;
 		sc->tulip_flags &= ~TULIP_DIDNWAY;
 		tulip_linkup(sc, media);
 		return 0;
 	    }
 	}
     }
     sc->tulip_flags &= ~(TULIP_TXPROBE_ACTIVE|TULIP_WANTRXACT);
     tulip_reset(sc);
     tulip_init(sc);
     return 0;
 }
 
 /*
  * Media status callback
  */
 static void
 tulip_ifmedia_status(
     struct ifnet * const ifp,
     struct ifmediareq *req)
 {
     tulip_softc_t *sc = TULIP_IFP_TO_SOFTC(ifp);
 
 #if defined(__bsdi__)
     if (sc->tulip_mii.mii_instance != 0) {
 	mii_pollstat(&sc->tulip_mii);
 	req->ifm_active = sc->tulip_mii.mii_media_active;
 	req->ifm_status = sc->tulip_mii.mii_media_status;
 	return;
     }
 #endif
     if (sc->tulip_media == TULIP_MEDIA_UNKNOWN)
 	return;
 
     req->ifm_status = IFM_AVALID;
     if (sc->tulip_flags & TULIP_LINKUP)
 	req->ifm_status |= IFM_ACTIVE;
 
     req->ifm_active = tulip_media_to_ifmedia[sc->tulip_media];
 }
 #endif
 
 static void
 tulip_addr_filter(
     tulip_softc_t * const sc)
 {
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
     struct ifmultiaddr *ifma;
     u_char *addrp;
 #else
     struct ether_multistep step;
     struct ether_multi *enm;
 #endif
     int multicnt;
 
     sc->tulip_flags &= ~(TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY|TULIP_ALLMULTI);
     sc->tulip_flags |= TULIP_WANTSETUP|TULIP_WANTTXSTART;
     sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
     sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 #if defined(IFF_ALLMULTI)    
     sc->tulip_if.if_flags &= ~IFF_ALLMULTI;
 #endif
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
     multicnt = 0;
     for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 	 ifma = ifma->ifma_link.le_next) {
 
 	    if (ifma->ifma_addr->sa_family == AF_LINK)
 		multicnt++;
     }
 #else
     multicnt = sc->tulip_multicnt;
 #endif
 
     sc->tulip_if.if_start = tulip_ifstart;	/* so the setup packet gets queued */
     if (multicnt > 14) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	unsigned hash;
 	/*
 	 * Some early passes of the 21140 have broken implementations of
 	 * hash-perfect mode.  When we get too many multicasts for perfect
 	 * filtering with these chips, we need to switch into hash-only
 	 * mode (this is better than all-multicast on network with lots
 	 * of multicast traffic).
 	 */
 	if (sc->tulip_features & TULIP_HAVE_BROKEN_HASH)
 	    sc->tulip_flags |= TULIP_WANTHASHONLY;
 	else
 	    sc->tulip_flags |= TULIP_WANTHASHPERFECT;
 	/*
 	 * If we have more than 14 multicasts, we have
 	 * go into hash perfect mode (512 bit multicast
 	 * hash and one perfect hardware).
 	 */
 	bzero(sc->tulip_setupdata, sizeof(sc->tulip_setupdata));
 
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 	     ifma = ifma->ifma_link.le_next) {
 
 		if (ifma->ifma_addr->sa_family != AF_LINK)
 			continue;
 
 		hash = tulip_mchash(LLADDR((struct sockaddr_dl *)ifma->ifma_addr));
 		sp[hash >> 4] |= 1 << (hash & 0xF);
 	}
 #else
 	ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm);
 	while (enm != NULL) {
 		if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) {
 		    hash = tulip_mchash(enm->enm_addrlo);
 		    sp[hash >> 4] |= 1 << (hash & 0xF);
 		} else {
 		    sc->tulip_flags |= TULIP_ALLMULTI;
 		    sc->tulip_flags &= ~(TULIP_WANTHASHONLY|TULIP_WANTHASHPERFECT);
 		    break;
 		}
 		ETHER_NEXT_MULTI(step, enm);
 	}
 #endif
 	/*
 	 * No reason to use a hash if we are going to be
 	 * receiving every multicast.
 	 */
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    hash = tulip_mchash(etherbroadcastaddr);
 	    sp[hash >> 4] |= 1 << (hash & 0xF);
 	    if (sc->tulip_flags & TULIP_WANTHASHONLY) {
 		hash = tulip_mchash(sc->tulip_enaddr);
 		sp[hash >> 4] |= 1 << (hash & 0xF);
 	    } else {
 		sp[39] = ((u_int16_t *) sc->tulip_enaddr)[0]; 
 		sp[40] = ((u_int16_t *) sc->tulip_enaddr)[1]; 
 		sp[41] = ((u_int16_t *) sc->tulip_enaddr)[2];
 	    }
 	}
     }
     if ((sc->tulip_flags & (TULIP_WANTHASHPERFECT|TULIP_WANTHASHONLY)) == 0) {
 	u_int32_t *sp = sc->tulip_setupdata;
 	int idx = 0;
 	if ((sc->tulip_flags & TULIP_ALLMULTI) == 0) {
 	    /*
 	     * Else can get perfect filtering for 16 addresses.
 	     */
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	    for (ifma = sc->tulip_if.if_multiaddrs.lh_first; ifma != NULL;
 		 ifma = ifma->ifma_link.le_next) {
 		    if (ifma->ifma_addr->sa_family != AF_LINK)
 			    continue;
 		    addrp = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
 		    *sp++ = ((u_int16_t *) addrp)[0]; 
 		    *sp++ = ((u_int16_t *) addrp)[1]; 
 		    *sp++ = ((u_int16_t *) addrp)[2];
 		    idx++;
 	    }
 #else
 	    ETHER_FIRST_MULTI(step, TULIP_ETHERCOM(sc), enm);
 	    for (; enm != NULL; idx++) {
 		if (bcmp(enm->enm_addrlo, enm->enm_addrhi, 6) == 0) {
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[0]; 
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[1]; 
 		    *sp++ = ((u_int16_t *) enm->enm_addrlo)[2];
 		} else {
 		    sc->tulip_flags |= TULIP_ALLMULTI;
 		    break;
 		}
 		ETHER_NEXT_MULTI(step, enm);
 	    }
 #endif
 	    /*
 	     * Add the broadcast address.
 	     */
 	    idx++;
 	    *sp++ = 0xFFFF;
 	    *sp++ = 0xFFFF;
 	    *sp++ = 0xFFFF;
 	}
 	/*
 	 * Pad the rest with our hardware address
 	 */
 	for (; idx < 16; idx++) {
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[0]; 
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[1]; 
 	    *sp++ = ((u_int16_t *) sc->tulip_enaddr)[2];
 	}
     }
 #if defined(IFF_ALLMULTI)
     if (sc->tulip_flags & TULIP_ALLMULTI)
 	sc->tulip_if.if_flags |= IFF_ALLMULTI;
 #endif
 }
 
 static void
 tulip_reset(
     tulip_softc_t * const sc)
 {
     tulip_ringinfo_t *ri;
     tulip_desc_t *di;
     u_int32_t inreset = (sc->tulip_flags & TULIP_INRESET);
 
     /*
      * Brilliant.  Simply brilliant.  When switching modes/speeds
      * on a 2114*, you need to set the appriopriate MII/PCS/SCL/PS
      * bits in CSR6 and then do a software reset to get the 21140
      * to properly reset its internal pathways to the right places.
      *   Grrrr.
      */
     if (sc->tulip_boardsw->bd_media_preset != NULL)
 	(*sc->tulip_boardsw->bd_media_preset)(sc);
 
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     if (!inreset) {
 	sc->tulip_flags |= TULIP_INRESET;
 	sc->tulip_flags &= ~(TULIP_NEEDRESET|TULIP_RXBUFSLOW);
 	sc->tulip_if.if_flags &= ~IFF_OACTIVE;
     }
 
     TULIP_CSR_WRITE(sc, csr_txlist, TULIP_KVATOPHYS(sc, &sc->tulip_txinfo.ri_first[0]));
     TULIP_CSR_WRITE(sc, csr_rxlist, TULIP_KVATOPHYS(sc, &sc->tulip_rxinfo.ri_first[0]));
     TULIP_CSR_WRITE(sc, csr_busmode,
 		    (1 << (TULIP_BURSTSIZE(sc->tulip_unit) + 8))
 		    |TULIP_BUSMODE_CACHE_ALIGN8
 		    |TULIP_BUSMODE_READMULTIPLE
 		    |(BYTE_ORDER != LITTLE_ENDIAN ? TULIP_BUSMODE_BIGENDIAN : 0));
 
     sc->tulip_txtimer = 0;
     sc->tulip_txq.ifq_maxlen = TULIP_TXDESCS;
     /*
      * Free all the mbufs that were on the transmit ring.
      */
     for (;;) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_txq, m);
 	if (m == NULL)
 	    break;
 	m_freem(m);
     }
 
     ri = &sc->tulip_txinfo;
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     for (di = ri->ri_first; di < ri->ri_last; di++)
 	di->d_status = 0;
 
     /*
      * We need to collect all the mbufs were on the 
      * receive ring before we reinit it either to put
      * them back on or to know if we have to allocate
      * more.
      */
     ri = &sc->tulip_rxinfo;
     ri->ri_nextin = ri->ri_nextout = ri->ri_first;
     ri->ri_free = ri->ri_max;
     for (di = ri->ri_first; di < ri->ri_last; di++) {
 	di->d_status = 0;
 	di->d_length1 = 0; di->d_addr1 = 0;
 	di->d_length2 = 0; di->d_addr2 = 0;
     }
     for (;;) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_rxq, m);
 	if (m == NULL)
 	    break;
 	m_freem(m);
     }
 
     /*
      * If tulip_reset is being called recurisvely, exit quickly knowing
      * that when the outer tulip_reset returns all the right stuff will
      * have happened.
      */
     if (inreset)
 	return;
 
     sc->tulip_intrmask |= TULIP_STS_NORMALINTR|TULIP_STS_RXINTR|TULIP_STS_TXINTR
 	|TULIP_STS_ABNRMLINTR|TULIP_STS_SYSERROR|TULIP_STS_TXSTOPPED
 	|TULIP_STS_TXUNDERFLOW|TULIP_STS_TXBABBLE|TULIP_STS_LINKFAIL
 	|TULIP_STS_RXSTOPPED;
 
     if ((sc->tulip_flags & TULIP_DEVICEPROBE) == 0)
 	(*sc->tulip_boardsw->bd_media_select)(sc);
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_flags & TULIP_NEEDRESET) == TULIP_NEEDRESET)
 	printf(TULIP_PRINTF_FMT ": tulip_reset: additional reset needed?!?\n",
 	       TULIP_PRINTF_ARGS);
 #endif
     tulip_media_print(sc);
     if (sc->tulip_features & TULIP_HAVE_DUALSENSE)
 	TULIP_CSR_WRITE(sc, csr_sia_status, TULIP_CSR_READ(sc, csr_sia_status));
 
     sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_WANTSETUP|TULIP_INRESET
 			 |TULIP_RXACT);
     tulip_addr_filter(sc);
 }
 
 static void
 tulip_init(
     tulip_softc_t * const sc)
 {
     if (sc->tulip_if.if_flags & IFF_UP) {
 	if ((sc->tulip_if.if_flags & IFF_RUNNING) == 0) {
 	    /* initialize the media */
 	    tulip_reset(sc);
 	}
 	sc->tulip_if.if_flags |= IFF_RUNNING;
 	if (sc->tulip_if.if_flags & IFF_PROMISC) {
 	    sc->tulip_flags |= TULIP_PROMISC;
 	    sc->tulip_cmdmode |= TULIP_CMD_PROMISCUOUS;
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	} else {
 	    sc->tulip_flags &= ~TULIP_PROMISC;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_PROMISCUOUS;
 	    if (sc->tulip_flags & TULIP_ALLMULTI) {
 		sc->tulip_cmdmode |= TULIP_CMD_ALLMULTI;
 	    } else {
 		sc->tulip_cmdmode &= ~TULIP_CMD_ALLMULTI;
 	    }
 	}
 	sc->tulip_cmdmode |= TULIP_CMD_TXRUN;
 	if ((sc->tulip_flags & (TULIP_TXPROBE_ACTIVE|TULIP_WANTSETUP)) == 0) {
 	    tulip_rx_intr(sc);
 	    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 	} else {
 	    sc->tulip_if.if_flags |= IFF_OACTIVE;
 	    sc->tulip_cmdmode &= ~TULIP_CMD_RXRUN;
 	    sc->tulip_intrmask &= ~TULIP_STS_RXSTOPPED;
 	}
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	    tulip_txput_setup(sc);
     } else {
 	sc->tulip_if.if_flags &= ~IFF_RUNNING;
 	tulip_reset(sc);
     }
 }
 
 static void
 tulip_rx_intr(
     tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(rxintr)
     tulip_ringinfo_t * const ri = &sc->tulip_rxinfo;
     struct ifnet * const ifp = &sc->tulip_if;
     int fillok = 1;
 #if defined(TULIP_DEBUG)
     int cnt = 0;
 #endif
 
     for (;;) {
 	TULIP_PERFSTART(rxget)
 	struct ether_header eh;
 	tulip_desc_t *eop = ri->ri_nextin;
 	int total_len = 0, last_offset = 0;
 	struct mbuf *ms = NULL, *me = NULL;
 	int accept = 0;
 
 	if (fillok && sc->tulip_rxq.ifq_len < TULIP_RXQ_TARGET)
 	    goto queue_mbuf;
 
 #if defined(TULIP_DEBUG)
 	if (cnt == ri->ri_max)
 	    break;
 #endif
 	/*
 	 * If the TULIP has no descriptors, there can't be any receive
 	 * descriptors to process.
  	 */
 	if (eop == ri->ri_nextout)
 	    break;
 	    
 	/*
 	 * 90% of the packets will fit in one descriptor.  So we optimize
 	 * for that case.
 	 */
 	if ((((volatile tulip_desc_t *) eop)->d_status & (TULIP_DSTS_OWNER|TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) == (TULIP_DSTS_RxFIRSTDESC|TULIP_DSTS_RxLASTDESC)) {
 	    IF_DEQUEUE(&sc->tulip_rxq, ms);
 	    me = ms;
 	} else {
 	    /*
 	     * If still owned by the TULIP, don't touch it.
 	     */
 	    if (((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER)
 		break;
 
 	    /*
 	     * It is possible (though improbable unless the BIG_PACKET support
 	     * is enabled or MCLBYTES < 1518) for a received packet to cross
 	     * more than one receive descriptor.  
 	     */
 	    while ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_RxLASTDESC) == 0) {
 		if (++eop == ri->ri_last)
 		    eop = ri->ri_first;
 		if (eop == ri->ri_nextout || ((((volatile tulip_desc_t *) eop)->d_status & TULIP_DSTS_OWNER))) {
 #if defined(TULIP_DEBUG)
 		    sc->tulip_dbg.dbg_rxintrs++;
 		    sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
 		    TULIP_PERFEND(rxget);
 		    TULIP_PERFEND(rxintr);
 		    return;
 		}
 		total_len++;
 	    }
 
 	    /*
 	     * Dequeue the first buffer for the start of the packet.  Hopefully
 	     * this will be the only one we need to dequeue.  However, if the
 	     * packet consumed multiple descriptors, then we need to dequeue
 	     * those buffers and chain to the starting mbuf.  All buffers but
 	     * the last buffer have the same length so we can set that now.
 	     * (we add to last_offset instead of multiplying since we normally
 	     * won't go into the loop and thereby saving a ourselves from
 	     * doing a multiplication by 0 in the normal case).
 	     */
 	    IF_DEQUEUE(&sc->tulip_rxq, ms);
 	    for (me = ms; total_len > 0; total_len--) {
 		me->m_len = TULIP_RX_BUFLEN;
 		last_offset += TULIP_RX_BUFLEN;
 		IF_DEQUEUE(&sc->tulip_rxq, me->m_next);
 		me = me->m_next;
 	    }
 	}
 
 	/*
 	 *  Now get the size of received packet (minus the CRC).
 	 */
 	total_len = ((eop->d_status >> 16) & 0x7FFF) - 4;
 	if ((sc->tulip_flags & TULIP_RXIGNORE) == 0
 		&& ((eop->d_status & TULIP_DSTS_ERRSUM) == 0
 #ifdef BIG_PACKET
 		     || (total_len <= sc->tulip_if.if_mtu + sizeof(struct ether_header) && 
 			 (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxRUNT|
 					  TULIP_DSTS_RxCOLLSEEN|TULIP_DSTS_RxBADCRC|
 					  TULIP_DSTS_RxOVERFLOW)) == 0)
 #endif
 		)) {
 	    me->m_len = total_len - last_offset;
 	    eh = *mtod(ms, struct ether_header *);
 #if NBPFILTER > 0
 	    if (sc->tulip_bpf != NULL)
 		if (me == ms)
 		    TULIP_BPF_TAP(sc, mtod(ms, caddr_t), total_len);
 		else
 		    TULIP_BPF_MTAP(sc, ms);
 #endif
 	    sc->tulip_flags |= TULIP_RXACT;
 	    if ((sc->tulip_flags & (TULIP_PROMISC|TULIP_HASHONLY))
 		    && (eh.ether_dhost[0] & 1) == 0
 		    && !TULIP_ADDREQUAL(eh.ether_dhost, sc->tulip_enaddr))
 		    goto next;
 	    accept = 1;
 	    total_len -= sizeof(struct ether_header);
 	} else {
 	    ifp->if_ierrors++;
 	    if (eop->d_status & (TULIP_DSTS_RxBADLENGTH|TULIP_DSTS_RxOVERFLOW|TULIP_DSTS_RxWATCHDOG)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 	    } else {
 		const char *error = NULL;
 		if (eop->d_status & TULIP_DSTS_RxTOOLONG) {
 		    sc->tulip_dot3stats.dot3StatsFrameTooLongs++;
 		    error = "frame too long";
 		}
 		if (eop->d_status & TULIP_DSTS_RxBADCRC) {
 		    if (eop->d_status & TULIP_DSTS_RxDRBBLBIT) {
 			sc->tulip_dot3stats.dot3StatsAlignmentErrors++;
 			error = "alignment error";
 		    } else {
 			sc->tulip_dot3stats.dot3StatsFCSErrors++;
 			error = "bad crc";
 		    }
 		}
 		if (error != NULL && (sc->tulip_flags & TULIP_NOMESSAGES) == 0) {
 		    printf(TULIP_PRINTF_FMT ": receive: " TULIP_EADDR_FMT ": %s\n",
 			   TULIP_PRINTF_ARGS,
 			   TULIP_EADDR_ARGS(mtod(ms, u_char *) + 6),
 			   error);
 		    sc->tulip_flags |= TULIP_NOMESSAGES;
 		}
 	    }
 	}
       next:
 #if defined(TULIP_DEBUG)
 	cnt++;
 #endif
 	ifp->if_ipackets++;
 	if (++eop == ri->ri_last)
 	    eop = ri->ri_first;
 	ri->ri_nextin = eop;
       queue_mbuf:
 	/*
 	 * Either we are priming the TULIP with mbufs (m == NULL)
 	 * or we are about to accept an mbuf for the upper layers
 	 * so we need to allocate an mbuf to replace it.  If we
 	 * can't replace it, send up it anyways.  This may cause
 	 * us to drop packets in the future but that's better than
 	 * being caught in livelock.
 	 *
 	 * Note that if this packet crossed multiple descriptors
 	 * we don't even try to reallocate all the mbufs here.
 	 * Instead we rely on the test of the beginning of
 	 * the loop to refill for the extra consumed mbufs.
 	 */
 	if (accept || ms == NULL) {
 	    struct mbuf *m0;
 	    MGETHDR(m0, M_DONTWAIT, MT_DATA);
 	    if (m0 != NULL) {
 #if defined(TULIP_COPY_RXDATA)
 		if (!accept || total_len >= MHLEN) {
 #endif
 		    MCLGET(m0, M_DONTWAIT);
 		    if ((m0->m_flags & M_EXT) == 0) {
 			m_freem(m0);
 			m0 = NULL;
 		    }
 #if defined(TULIP_COPY_RXDATA)
 		}
 #endif
 	    }
 	    if (accept
 #if defined(TULIP_COPY_RXDATA)
 		&& m0 != NULL
 #endif
 		) {
 #if defined(__bsdi__)
 		eh.ether_type = ntohs(eh.ether_type);
 #endif
 #if !defined(TULIP_COPY_RXDATA)
 		ms->m_data += sizeof(struct ether_header);
 		ms->m_len -= sizeof(struct ether_header);
 		ms->m_pkthdr.len = total_len;
 		ms->m_pkthdr.rcvif = ifp;
 		ether_input(ifp, &eh, ms);
 #else
 #ifdef BIG_PACKET
 #error BIG_PACKET is incompatible with TULIP_COPY_RXDATA
 #endif
 		if (ms == me)
 		    bcopy(mtod(ms, caddr_t) + sizeof(struct ether_header),
 			  mtod(m0, caddr_t), total_len);
 		else
 		    m_copydata(ms, 0, total_len, mtod(m0, caddr_t));
 		m0->m_len = m0->m_pkthdr.len = total_len;
 		m0->m_pkthdr.rcvif = ifp;
 		ether_input(ifp, &eh, m0);
 		m0 = ms;
 #endif
 	    }
 	    ms = m0;
 	}
 	if (ms == NULL) {
 	    /*
 	     * Couldn't allocate a new buffer.  Don't bother 
 	     * trying to replenish the receive queue.
 	     */
 	    fillok = 0;
 	    sc->tulip_flags |= TULIP_RXBUFSLOW;
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_rxlowbufs++;
 #endif
 	    TULIP_PERFEND(rxget);
 	    continue;
 	}
 	/*
 	 * Now give the buffer(s) to the TULIP and save in our
 	 * receive queue.
 	 */
 	do {
 	    ri->ri_nextout->d_length1 = TULIP_RX_BUFLEN;
 	    ri->ri_nextout->d_addr1 = TULIP_KVATOPHYS(sc, mtod(ms, caddr_t));
 	    ri->ri_nextout->d_status = TULIP_DSTS_OWNER;
 	    if (++ri->ri_nextout == ri->ri_last)
 		ri->ri_nextout = ri->ri_first;
 	    me = ms->m_next;
 	    ms->m_next = NULL;
 	    IF_ENQUEUE(&sc->tulip_rxq, ms);
 	} while ((ms = me) != NULL);
 
 	if (sc->tulip_rxq.ifq_len >= TULIP_RXQ_TARGET)
 	    sc->tulip_flags &= ~TULIP_RXBUFSLOW;
 	TULIP_PERFEND(rxget);
     }
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_rxintrs++;
     sc->tulip_dbg.dbg_rxpktsperintr[cnt]++;
 #endif
     TULIP_PERFEND(rxintr);
 }
 
 static int
 tulip_tx_intr(
     tulip_softc_t * const sc)
 {
     TULIP_PERFSTART(txintr)
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     struct mbuf *m;
     int xmits = 0;
     int descs = 0;
 
     while (ri->ri_free < ri->ri_max) {
 	u_int32_t d_flag;
 	if (((volatile tulip_desc_t *) ri->ri_nextin)->d_status & TULIP_DSTS_OWNER)
 	    break;
 
 	d_flag = ri->ri_nextin->d_flag;
 	if (d_flag & TULIP_DFLAG_TxLASTSEG) {
 	    if (d_flag & TULIP_DFLAG_TxSETUPPKT) {
 		/*
 		 * We've just finished processing a setup packet.
 		 * Mark that we finished it.  If there's not
 		 * another pending, startup the TULIP receiver.
 		 * Make sure we ack the RXSTOPPED so we won't get
 		 * an abormal interrupt indication.
 		 */
 		sc->tulip_flags &= ~(TULIP_DOINGSETUP|TULIP_HASHONLY);
 		if (ri->ri_nextin->d_flag & TULIP_DFLAG_TxINVRSFILT)
 		    sc->tulip_flags |= TULIP_HASHONLY;
 		if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == 0) {
 		    tulip_rx_intr(sc);
 		    sc->tulip_cmdmode |= TULIP_CMD_RXRUN;
 		    sc->tulip_intrmask |= TULIP_STS_RXSTOPPED;
 		    TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 		    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 		}
 	    } else {
 		const u_int32_t d_status = ri->ri_nextin->d_status;
 		IF_DEQUEUE(&sc->tulip_txq, m);
 		if (m != NULL) {
 #if NBPFILTER > 0
 		    if (sc->tulip_bpf != NULL)
 			TULIP_BPF_MTAP(sc, m);
 #endif
 		    m_freem(m);
 #if defined(TULIP_DEBUG)
 		} else {
 		    printf(TULIP_PRINTF_FMT ": tx_intr: failed to dequeue mbuf?!?\n", TULIP_PRINTF_ARGS);
 #endif
 		}
 		if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 		    tulip_mediapoll_event_t event = TULIP_MEDIAPOLL_TXPROBE_OK;
 		    if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxEXCCOLL)) {
 #if defined(TULIP_DEBUG)
 			if (d_status & TULIP_DSTS_TxNOCARR)
 			    sc->tulip_dbg.dbg_txprobe_nocarr++;
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dbg.dbg_txprobe_exccoll++;
 #endif
 			event = TULIP_MEDIAPOLL_TXPROBE_FAILED;
 		    }
 		    (*sc->tulip_boardsw->bd_media_poll)(sc, event);
 		    /*
 		     * Escape from the loop before media poll has reset the TULIP!
 		     */
 		    break;
 		} else {
 		    xmits++;
 		    if (d_status & TULIP_DSTS_ERRSUM) {
 			sc->tulip_if.if_oerrors++;
 			if (d_status & TULIP_DSTS_TxEXCCOLL)
 			    sc->tulip_dot3stats.dot3StatsExcessiveCollisions++;
 			if (d_status & TULIP_DSTS_TxLATECOLL)
 			    sc->tulip_dot3stats.dot3StatsLateCollisions++;
 			if (d_status & (TULIP_DSTS_TxNOCARR|TULIP_DSTS_TxCARRLOSS))
 			    sc->tulip_dot3stats.dot3StatsCarrierSenseErrors++;
 			if (d_status & (TULIP_DSTS_TxUNDERFLOW|TULIP_DSTS_TxBABBLE))
 			    sc->tulip_dot3stats.dot3StatsInternalMacTransmitErrors++;
 			if (d_status & TULIP_DSTS_TxUNDERFLOW)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitUnderflows++;
 			if (d_status & TULIP_DSTS_TxBABBLE)
 			    sc->tulip_dot3stats.dot3StatsInternalTransmitBabbles++;
 		    } else {
 			u_int32_t collisions = 
 			    (d_status & TULIP_DSTS_TxCOLLMASK)
 				>> TULIP_DSTS_V_TxCOLLCNT;
 			sc->tulip_if.if_collisions += collisions;
 			if (collisions == 1)
 			    sc->tulip_dot3stats.dot3StatsSingleCollisionFrames++;
 			else if (collisions > 1)
 			    sc->tulip_dot3stats.dot3StatsMultipleCollisionFrames++;
 			else if (d_status & TULIP_DSTS_TxDEFERRED)
 			    sc->tulip_dot3stats.dot3StatsDeferredTransmissions++;
 			/*
 			 * SQE is only valid for 10baseT/BNC/AUI when not
 			 * running in full-duplex.  In order to speed up the
 			 * test, the corresponding bit in tulip_flags needs to
 			 * set as well to get us to count SQE Test Errors.
 			 */
 			if (d_status & TULIP_DSTS_TxNOHRTBT & sc->tulip_flags)
 			    sc->tulip_dot3stats.dot3StatsSQETestErrors++;
 		    }
 		}
 	    }
 	}
 
 	if (++ri->ri_nextin == ri->ri_last)
 	    ri->ri_nextin = ri->ri_first;
 
 	ri->ri_free++;
 	descs++;
 	if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 	    sc->tulip_if.if_flags &= ~IFF_OACTIVE;
     }
     /*
      * If nothing left to transmit, disable the timer.
      * Else if progress, reset the timer back to 2 ticks.
      */
     if (ri->ri_free == ri->ri_max || (sc->tulip_flags & TULIP_TXPROBE_ACTIVE))
 	sc->tulip_txtimer = 0;
     else if (xmits > 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
     sc->tulip_if.if_opackets += xmits;
     TULIP_PERFEND(txintr);
     return descs;
 }
 
 static void
 tulip_print_abnormal_interrupt(
     tulip_softc_t * const sc,
     u_int32_t csr)
 {
     const char * const *msgp = tulip_status_bits;
     const char *sep;
     u_int32_t mask;
     const char thrsh[] = "72|128\0\0\096|256\0\0\0128|512\0\0160|1024\0";
 
     csr &= (1 << (sizeof(tulip_status_bits)/sizeof(tulip_status_bits[0]))) - 1;
     printf(TULIP_PRINTF_FMT ": abnormal interrupt:", TULIP_PRINTF_ARGS);
     for (sep = " ", mask = 1; mask <= csr; mask <<= 1, msgp++) {
 	if ((csr & mask) && *msgp != NULL) {
 	    printf("%s%s", sep, *msgp);
 	    if (mask == TULIP_STS_TXUNDERFLOW && (sc->tulip_flags & TULIP_NEWTXTHRESH)) {
 		sc->tulip_flags &= ~TULIP_NEWTXTHRESH;
 		if (sc->tulip_cmdmode & TULIP_CMD_STOREFWD) {
 		    printf(" (switching to store-and-forward mode)");
 		} else {
 		    printf(" (raising TX threshold to %s)",
 			   &thrsh[9 * ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) >> 14)]);
 		}
 	    }
 	    sep = ", ";
 	}
     }
     printf("\n");
 }
 
 static void
 tulip_intr_handler(
     tulip_softc_t * const sc,
     int *progress_p)
 {
     TULIP_PERFSTART(intr)
     u_int32_t csr;
 #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR)
     int only_once;
 
     only_once = 1;
 #endif
 
     while ((csr = TULIP_CSR_READ(sc, csr_status)) & sc->tulip_intrmask) {
 #if defined(__NetBSD__) && !defined(TULIP_USE_SOFTINTR)
         if (only_once == 1) {
 #if NRND > 0
 	    rnd_add_uint32(&sc->tulip_rndsource, csr);
 #endif
 	    only_once = 0;
 	}
 #endif
 
 	*progress_p = 1;
 	TULIP_CSR_WRITE(sc, csr_status, csr);
 
 	if (csr & TULIP_STS_SYSERROR) {
 	    sc->tulip_last_system_error = (csr & TULIP_STS_ERRORMASK) >> TULIP_STS_ERR_SHIFT;
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_flags |= TULIP_SYSTEMERROR;
 	    } else {
 		printf(TULIP_PRINTF_FMT ": system error: %s\n",
 		       TULIP_PRINTF_ARGS,
 		       tulip_system_errors[sc->tulip_last_system_error]);
 	    }
 	    sc->tulip_flags |= TULIP_NEEDRESET;
 	    sc->tulip_system_errors++;
 	    break;
 	}
 	if (csr & (TULIP_STS_LINKPASS|TULIP_STS_LINKFAIL)) {
 #if defined(TULIP_DEBUG)
 	    sc->tulip_dbg.dbg_link_intrs++;
 #endif
 	    if (sc->tulip_boardsw->bd_media_poll != NULL) {
 		(*sc->tulip_boardsw->bd_media_poll)(sc, csr & TULIP_STS_LINKFAIL
 						    ? TULIP_MEDIAPOLL_LINKFAIL
 						    : TULIP_MEDIAPOLL_LINKPASS);
 		csr &= ~TULIP_STS_ABNRMLINTR;
 	    }
 	    tulip_media_print(sc);
 	}
 	if (csr & (TULIP_STS_RXINTR|TULIP_STS_RXNOBUF)) {
 	    u_int32_t misses = TULIP_CSR_READ(sc, csr_missed_frames);
 	    if (csr & TULIP_STS_RXNOBUF)
 		sc->tulip_dot3stats.dot3StatsMissedFrames += misses & 0xFFFF;
 	    /*
 	     * Pass 2.[012] of the 21140A-A[CDE] may hang and/or corrupt data
 	     * on receive overflows.
 	     */
 	   if ((misses & 0x0FFE0000) && (sc->tulip_features & TULIP_HAVE_RXBADOVRFLW)) {
 		sc->tulip_dot3stats.dot3StatsInternalMacReceiveErrors++;
 		/*
 		 * Stop the receiver process and spin until it's stopped.
 		 * Tell rx_intr to drop the packets it dequeues.
 		 */
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode & ~TULIP_CMD_RXRUN);
 		while ((TULIP_CSR_READ(sc, csr_status) & TULIP_STS_RXSTOPPED) == 0)
 		    ;
 		TULIP_CSR_WRITE(sc, csr_status, TULIP_STS_RXSTOPPED);
 		sc->tulip_flags |= TULIP_RXIGNORE;
 	    }
 	    tulip_rx_intr(sc);
 	    if (sc->tulip_flags & TULIP_RXIGNORE) {
 		/*
 		 * Restart the receiver.
 		 */
 		sc->tulip_flags &= ~TULIP_RXIGNORE;
 		TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	    }
 	}
 	if (csr & TULIP_STS_ABNRMLINTR) {
 	    u_int32_t tmp = csr & sc->tulip_intrmask
 		& ~(TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR);
 	    if (csr & TULIP_STS_TXUNDERFLOW) {
 		if ((sc->tulip_cmdmode & TULIP_CMD_THRESHOLDCTL) != TULIP_CMD_THRSHLD160) {
 		    sc->tulip_cmdmode += TULIP_CMD_THRSHLD96;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		} else if (sc->tulip_features & TULIP_HAVE_STOREFWD) {
 		    sc->tulip_cmdmode |= TULIP_CMD_STOREFWD;
 		    sc->tulip_flags |= TULIP_NEWTXTHRESH;
 		}
 	    }
 	    if (sc->tulip_flags & TULIP_NOMESSAGES) {
 		sc->tulip_statusbits |= tmp;
 	    } else {
 		tulip_print_abnormal_interrupt(sc, tmp);
 		sc->tulip_flags |= TULIP_NOMESSAGES;
 	    }
 	    TULIP_CSR_WRITE(sc, csr_command, sc->tulip_cmdmode);
 	}
 	if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_TXPROBE_ACTIVE|TULIP_DOINGSETUP|TULIP_PROMISC)) {
 	    tulip_tx_intr(sc);
 	    if ((sc->tulip_flags & TULIP_TXPROBE_ACTIVE) == 0)
 		tulip_ifstart(&sc->tulip_if);
 	}
     }
     if (sc->tulip_flags & TULIP_NEEDRESET) {
 	tulip_reset(sc);
 	tulip_init(sc);
     }
     TULIP_PERFEND(intr);
 }
 
 #if defined(TULIP_USE_SOFTINTR)
 /*
  * This is a experimental idea to alleviate problems due to interrupt
  * livelock.  What is interrupt livelock?  It's when you spend all your
  * time servicing device interrupts and never drop below device ipl
  * to do "useful" work.
  *
  * So what we do here is see if the device needs service and if so,
  * disable interrupts (dismiss the interrupt), place it in a list of devices
  * needing service, and issue a network software interrupt.
  *
  * When our network software interrupt routine gets called, we simply
  * walk done the list of devices that we have created and deal with them
  * at splnet/splsoftnet.
  *
  */
 static void
 tulip_hardintr_handler(
     tulip_softc_t * const sc,
     int *progress_p)
 {
     if (TULIP_CSR_READ(sc, csr_status) & (TULIP_STS_NORMALINTR|TULIP_STS_ABNRMLINTR) == 0)
 	return;
     *progress_p = 1;
     /*
      * disable interrupts
      */
     TULIP_CSR_WRITE(sc, csr_intr, 0);
     /*
      * mark it as needing a software interrupt
      */
     tulip_softintr_mask |= (1U << sc->tulip_unit);
 
 #if defined(__NetBSD__) && NRND > 0
     /*
      * This isn't all that random (the value we feed in) but it is
      * better than a constant probably.  It isn't used in entropy
      * calculation anyway, just to add something to the pool.
      */
     rnd_add_uint32(&sc->tulip_rndsource, sc->tulip_flags);
 #endif
 }
 
 static void
 tulip_softintr(
     void)
 {
     u_int32_t softintr_mask, mask;
     int progress = 0;
     int unit;
     tulip_spl_t s;
 
     /*
      * Copy mask to local copy and reset global one to 0.
      */
     s = TULIP_RAISESPL();
     softintr_mask = tulip_softintr_mask;
     tulip_softintr_mask = 0;
     TULIP_RESTORESPL(s);
 
     /*
      * Optimize for the single unit case.
      */
     if (tulip_softintr_max_unit == 0) {
 	if (softintr_mask & 1) {
 	    tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(0);
 	    /*
 	     * Handle the "interrupt" and then reenable interrupts
 	     */
 	    softintr_mask = 0;
 	    tulip_intr_handler(sc, &progress);
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
 	return;
     }
 
     /*
      * Handle all "queued" interrupts in a round robin fashion.
      * This is done so as not to favor a particular interface.
      */
     unit = tulip_softintr_last_unit;
     mask = (1U << unit);
     while (softintr_mask != 0) {
 	if (tulip_softintr_max_unit == unit) {
 	    unit  = 0; mask   = 1;
 	} else {
 	    unit += 1; mask <<= 1;
 	}
 	if (softintr_mask & mask) {
 	    tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(unit);
 	    /*
 	     * Handle the "interrupt" and then reenable interrupts
 	     */
 	    softintr_mask ^= mask;
 	    tulip_intr_handler(sc, &progress);
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     }
 
     /*
      * Save where we ending up.
      */
     tulip_softintr_last_unit = unit;
 }
 #endif	/* TULIP_USE_SOFTINTR */
 
 static tulip_intrfunc_t
 tulip_intr_shared(
     void *arg)
 {
     tulip_softc_t * sc = arg;
     int progress = 0;
 
     for (; sc != NULL; sc = sc->tulip_slaves) {
 #if defined(TULIP_DEBUG)
 	sc->tulip_dbg.dbg_intrs++;
 #endif
 #if defined(TULIP_USE_SOFTINTR)
 	tulip_hardintr_handler(sc, &progress);
 #else
 	tulip_intr_handler(sc, &progress);
 #endif
     }
 #if defined(TULIP_USE_SOFTINTR)
     if (progress)
 	schednetisr(NETISR_DE);
 #endif
 #if !defined(TULIP_VOID_INTRFUNC)
     return progress;
 #endif
 }
 
 static tulip_intrfunc_t
 tulip_intr_normal(
     void *arg)
 {
     tulip_softc_t * sc = (tulip_softc_t *) arg;
     int progress = 0;
 
 #if defined(TULIP_DEBUG)
     sc->tulip_dbg.dbg_intrs++;
 #endif
 #if defined(TULIP_USE_SOFTINTR)
     tulip_hardintr_handler(sc, &progress);
     if (progress)
 	schednetisr(NETISR_DE);
 #else
     tulip_intr_handler(sc, &progress);
 #endif
 #if !defined(TULIP_VOID_INTRFUNC)
     return progress;
 #endif
 }
 
 static struct mbuf *
 tulip_mbuf_compress(
     struct mbuf *m)
 {
     struct mbuf *m0;
 #if MCLBYTES >= ETHERMTU + 18 && !defined(BIG_PACKET)
     MGETHDR(m0, M_DONTWAIT, MT_DATA);
     if (m0 != NULL) {
 	if (m->m_pkthdr.len > MHLEN) {
 	    MCLGET(m0, M_DONTWAIT);
 	    if ((m0->m_flags & M_EXT) == 0) {
 		m_freem(m);
 		m_freem(m0);
 		return NULL;
 	    }
 	}
 	m_copydata(m, 0, m->m_pkthdr.len, mtod(m0, caddr_t));
 	m0->m_pkthdr.len = m0->m_len = m->m_pkthdr.len;
     }
 #else
     int mlen = MHLEN;
     int len = m->m_pkthdr.len;
     struct mbuf **mp = &m0;
 
     while (len > 0) {
 	if (mlen == MHLEN) {
 	    MGETHDR(*mp, M_DONTWAIT, MT_DATA);
 	} else {
 	    MGET(*mp, M_DONTWAIT, MT_DATA);
 	}
 	if (*mp == NULL) {
 	    m_freem(m0);
 	    m0 = NULL;
 	    break;
 	}
 	if (len > MLEN) {
 	    MCLGET(*mp, M_DONTWAIT);
 	    if (((*mp)->m_flags & M_EXT) == 0) {
 		m_freem(m0);
 		m0 = NULL;
 		break;
 	    }
 	    (*mp)->m_len = len <= MCLBYTES ? len : MCLBYTES;
 	} else {
 	    (*mp)->m_len = len <= mlen ? len : mlen;
 	}
 	m_copydata(m, m->m_pkthdr.len - len,
 		   (*mp)->m_len, mtod((*mp), caddr_t));
 	len -= (*mp)->m_len;
 	mp = &(*mp)->m_next;
 	mlen = MLEN;
     }
 #endif
     m_freem(m);
     return m0;
 }
 
 static struct mbuf *
 tulip_txput(
     tulip_softc_t * const sc,
     struct mbuf *m)
 {
     TULIP_PERFSTART(txput)
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_desc_t *eop, *nextout;
     int segcnt, free;
     u_int32_t d_status;
     struct mbuf *m0;
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	printf(TULIP_PRINTF_FMT ": txput%s: tx not running\n",
 	       TULIP_PRINTF_ARGS,
 	       (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) ? "(probe)" : "");
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	goto finish;
     }
 #endif
 
     /*
      * Now we try to fill in our transmit descriptors.  This is
      * a bit reminiscent of going on the Ark two by two
      * since each descriptor for the TULIP can describe
      * two buffers.  So we advance through packet filling
      * each of the two entries at a time to to fill each
      * descriptor.  Clear the first and last segment bits
      * in each descriptor (actually just clear everything
      * but the end-of-ring or chain bits) to make sure
      * we don't get messed up by previously sent packets.
      *
      * We may fail to put the entire packet on the ring if
      * there is either not enough ring entries free or if the
      * packet has more than MAX_TXSEG segments.  In the former
      * case we will just wait for the ring to empty.  In the
      * latter case we have to recopy.
      */
   again:
     d_status = 0;
     eop = nextout = ri->ri_nextout;
     m0 = m;
     segcnt = 0;
     free = ri->ri_free;
     do {
 	int len = m0->m_len;
 	caddr_t addr = mtod(m0, caddr_t);
 	unsigned clsize = CLBYTES - (((u_long) addr) & (CLBYTES-1));
 
 	while (len > 0) {
 	    unsigned slen = min(len, clsize);
 #ifdef BIG_PACKET
 	    int partial = 0;
 	    if (slen >= 2048)
 		slen = 2040, partial = 1;
 #endif
 	    segcnt++;
 	    if (segcnt > TULIP_MAX_TXSEG) {
 		/*
 		 * The packet exceeds the number of transmit buffer
 		 * entries that we can use for one packet, so we have
 		 * recopy it into one mbuf and then try again.
 		 */
 		m = tulip_mbuf_compress(m);
 		if (m == NULL)
 		    goto finish;
 		goto again;
 	    }
 	    if (segcnt & 1) {
 		if (--free == 0) {
 		    /*
 		     * See if there's any unclaimed space in the
 		     * transmit ring.
 		     */
 		    if ((free += tulip_tx_intr(sc)) == 0) {
 			/*
 			 * There's no more room but since nothing
 			 * has been committed at this point, just
 			 * show output is active, put back the
 			 * mbuf and return.
 			 */
 			sc->tulip_flags |= TULIP_WANTTXSTART;
 			goto finish;
 		    }
 		}
 		eop = nextout;
 		if (++nextout == ri->ri_last)
 		    nextout = ri->ri_first;
 		eop->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
 		eop->d_status = d_status;
 		eop->d_addr1 = TULIP_KVATOPHYS(sc, addr);
 		eop->d_length1 = slen;
 	    } else {
 		/*
 		 *  Fill in second half of descriptor
 		 */
 		eop->d_addr2 = TULIP_KVATOPHYS(sc, addr);
 		eop->d_length2 = slen;
 	    }
 	    d_status = TULIP_DSTS_OWNER;
 	    len -= slen;
 	    addr += slen;
 #ifdef BIG_PACKET
 	    if (partial)
 		continue;
 #endif
 	    clsize = CLBYTES;
 	}
     } while ((m0 = m0->m_next) != NULL);
 
 
     /*
      * The descriptors have been filled in.  Now get ready
      * to transmit.
      */
     IF_ENQUEUE(&sc->tulip_txq, m);
     m = NULL;
 
     /*
      * Make sure the next descriptor after this packet is owned
      * by us since it may have been set up above if we ran out
      * of room in the ring.
      */
     nextout->d_status = 0;
 
     /*
      * If we only used the first segment of the last descriptor,
      * make sure the second segment will not be used.
      */
     if (segcnt & 1) {
 	eop->d_addr2 = 0;
 	eop->d_length2 = 0;
     }
 
     /*
      * Mark the last and first segments, indicate we want a transmit
      * complete interrupt, and tell it to transmit!
      */
     eop->d_flag |= TULIP_DFLAG_TxLASTSEG|TULIP_DFLAG_TxWANTINTR;
 
     /*
      * Note that ri->ri_nextout is still the start of the packet
      * and until we set the OWNER bit, we can still back out of
      * everything we have done.
      */
     ri->ri_nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG;
     ri->ri_nextout->d_status = TULIP_DSTS_OWNER;
 
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
 
     /*
      * This advances the ring for us.
      */
     ri->ri_nextout = nextout;
     ri->ri_free = free;
 
     TULIP_PERFEND(txput);
 
     if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	TULIP_PERFEND(txput);
 	return NULL;
     }
 
     /*
      * switch back to the single queueing ifstart.
      */
     sc->tulip_flags &= ~TULIP_WANTTXSTART;
     sc->tulip_if.if_start = tulip_ifstart_one;
     if (sc->tulip_txtimer == 0)
 	sc->tulip_txtimer = TULIP_TXTIMER;
 
     /*
      * If we want a txstart, there must be not enough space in the
      * transmit ring.  So we want to enable transmit done interrupts
      * so we can immediately reclaim some space.  When the transmit
      * interrupt is posted, the interrupt handler will call tx_intr
      * to reclaim space and then txstart (since WANTTXSTART is set).
      * txstart will move the packet into the transmit ring and clear
      * WANTTXSTART thereby causing TXINTR to be cleared.
      */
   finish:
     if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) {
 	sc->tulip_if.if_flags |= IFF_OACTIVE;
 	sc->tulip_if.if_start = tulip_ifstart;
 	if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	    sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     } else if ((sc->tulip_flags & TULIP_PROMISC) == 0) {
 	if (sc->tulip_intrmask & TULIP_STS_TXINTR) {
 	    sc->tulip_intrmask &= ~TULIP_STS_TXINTR;
 	    TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
 	}
     }
     TULIP_PERFEND(txput);
     return m;
 }
 
 static void
 tulip_txput_setup(
     tulip_softc_t * const sc)
 {
     tulip_ringinfo_t * const ri = &sc->tulip_txinfo;
     tulip_desc_t *nextout;
 	
     /*
      * We will transmit, at most, one setup packet per call to ifstart.
      */
 
 #if defined(TULIP_DEBUG)
     if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
 	printf(TULIP_PRINTF_FMT ": txput_setup: tx not running\n",
 	       TULIP_PRINTF_ARGS);
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	sc->tulip_if.if_start = tulip_ifstart;
 	return;
     }
 #endif
     /*
      * Try to reclaim some free descriptors..
      */
     if (ri->ri_free < 2)
 	tulip_tx_intr(sc);
     if ((sc->tulip_flags & TULIP_DOINGSETUP) || ri->ri_free == 1) {
 	sc->tulip_flags |= TULIP_WANTTXSTART;
 	sc->tulip_if.if_start = tulip_ifstart;
 	return;
     }
     bcopy(sc->tulip_setupdata, sc->tulip_setupbuf,
 	  sizeof(sc->tulip_setupbuf));
     /*
      * Clear WANTSETUP and set DOINGSETUP.  Set know that WANTSETUP is
      * set and DOINGSETUP is clear doing an XOR of the two will DTRT.
      */
     sc->tulip_flags ^= TULIP_WANTSETUP|TULIP_DOINGSETUP;
     ri->ri_free--;
     nextout = ri->ri_nextout;
     nextout->d_flag &= TULIP_DFLAG_ENDRING|TULIP_DFLAG_CHAIN;
     nextout->d_flag |= TULIP_DFLAG_TxFIRSTSEG|TULIP_DFLAG_TxLASTSEG
 	|TULIP_DFLAG_TxSETUPPKT|TULIP_DFLAG_TxWANTINTR;
     if (sc->tulip_flags & TULIP_WANTHASHPERFECT)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT;
     else if (sc->tulip_flags & TULIP_WANTHASHONLY)
 	nextout->d_flag |= TULIP_DFLAG_TxHASHFILT|TULIP_DFLAG_TxINVRSFILT;
 
     nextout->d_length1 = sizeof(sc->tulip_setupbuf);
     nextout->d_addr1 = TULIP_KVATOPHYS(sc, sc->tulip_setupbuf);
     nextout->d_length2 = 0;
     nextout->d_addr2 = 0;
 
     /*
      * Advance the ring for the next transmit packet.
      */
     if (++ri->ri_nextout == ri->ri_last)
 	ri->ri_nextout = ri->ri_first;
 
     /*
      * Make sure the next descriptor is owned by us since it
      * may have been set up above if we ran out of room in the
      * ring.
      */
     ri->ri_nextout->d_status = 0;
     nextout->d_status = TULIP_DSTS_OWNER;
     TULIP_CSR_WRITE(sc, csr_txpoll, 1);
     if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
 	sc->tulip_intrmask |= TULIP_STS_TXINTR;
 	TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);
     }
 }
 
 
 /*
  * This routine is entered at splnet() (splsoftnet() on NetBSD)
  * and thereby imposes no problems when TULIP_USE_SOFTINTR is 
  * defined or not.
  */
 static int
 tulip_ifioctl(
     struct ifnet * ifp,
     ioctl_cmd_t cmd,
     caddr_t data)
 {
     TULIP_PERFSTART(ifioctl)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
     struct ifaddr *ifa = (struct ifaddr *)data;
     struct ifreq *ifr = (struct ifreq *) data;
     tulip_spl_t s;
     int error = 0;
 
 #if defined(TULIP_USE_SOFTINTR)
     s = TULIP_RAISESOFTSPL();
 #else
     s = TULIP_RAISESPL();
 #endif
     switch (cmd) {
 	case SIOCSIFADDR: {
 	    ifp->if_flags |= IFF_UP;
 	    switch(ifa->ifa_addr->sa_family) {
 #ifdef INET
 		case AF_INET: {
 		    tulip_init(sc);
 		    TULIP_ARP_IFINIT(sc, ifa);
 		    break;
 		}
 #endif /* INET */
 
 #ifdef IPX
 		case AF_IPX: {
 		    struct ipx_addr *ina = &(IA_SIPX(ifa)->sipx_addr);
 		    if (ipx_nullhost(*ina)) {
 			ina->x_host = *(union ipx_host *)(sc->tulip_enaddr);
 		    } else {
 			ifp->if_flags &= ~IFF_RUNNING;
 			bcopy((caddr_t)ina->x_host.c_host,
 			      (caddr_t)sc->tulip_enaddr,
 			      sizeof(sc->tulip_enaddr));
 		    }
 		    tulip_init(sc);
 		    break;
 		}
 #endif /* IPX */
 
 #ifdef NS
 		/*
 		 * This magic copied from if_is.c; I don't use XNS,
 		 * so I have no way of telling if this actually
 		 * works or not.
 		 */
 		case AF_NS: {
 		    struct ns_addr *ina = &(IA_SNS(ifa)->sns_addr);
 		    if (ns_nullhost(*ina)) {
 			ina->x_host = *(union ns_host *)(sc->tulip_enaddr);
 		    } else {
 			ifp->if_flags &= ~IFF_RUNNING;
 			bcopy((caddr_t)ina->x_host.c_host,
 			      (caddr_t)sc->tulip_enaddr,
 			      sizeof(sc->tulip_enaddr));
 		    }
 		    tulip_init(sc);
 		    break;
 		}
 #endif /* NS */
 
 		default: {
 		    tulip_init(sc);
 		    break;
 		}
 	    }
 	    break;
 	}
 	case SIOCGIFADDR: {
 	    bcopy((caddr_t) sc->tulip_enaddr,
 		  (caddr_t) ((struct sockaddr *)&ifr->ifr_data)->sa_data,
 		  6);
 	    break;
 	}
 
 	case SIOCSIFFLAGS: {
 #if !defined(IFM_ETHER)
 	    int flags = 0;
 	    if (ifp->if_flags & IFF_LINK0) flags |= 1;
 	    if (ifp->if_flags & IFF_LINK1) flags |= 2;
 	    if (ifp->if_flags & IFF_LINK2) flags |= 4;
 	    if (flags == 7) {
 		ifp->if_flags &= ~(IFF_LINK0|IFF_LINK1|IFF_LINK2);
 		sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 		sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 		sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP|TULIP_NOAUTOSENSE);
 		tulip_reset(sc);
 	    } else if (flags) {
 		tulip_media_t media;
 		for (media = TULIP_MEDIA_UNKNOWN; media < TULIP_MEDIA_MAX; media++) {
 		    if (sc->tulip_mediums[media] != NULL && --flags == 0) {
 			sc->tulip_flags |= TULIP_NOAUTOSENSE;
 			if (sc->tulip_media != media || (sc->tulip_flags & TULIP_DIDNWAY)) {
 			    sc->tulip_flags &= ~TULIP_DIDNWAY;
 			    tulip_linkup(sc, media);
 			}
 			break;
 		    }
 		}
 		if (flags)
 		    printf(TULIP_PRINTF_FMT ": ignored invalid media request\n", TULIP_PRINTF_ARGS);
 	    }
 #endif
 	    tulip_init(sc);
 	    break;
 	}
 
 #if defined(SIOCSIFMEDIA)
 	case SIOCSIFMEDIA:
 	case SIOCGIFMEDIA: {
 	    error = ifmedia_ioctl(ifp, ifr, &sc->tulip_ifmedia, cmd);
 	    break;
 	}
 #endif
 
 	case SIOCADDMULTI:
 	case SIOCDELMULTI: {
 	    /*
 	     * Update multicast listeners
 	     */
 #if defined(__FreeBSD__) && __FreeBSD__ >= 3
 	    tulip_addr_filter(sc);		/* reset multicast filtering */
 	    tulip_init(sc);
 	    error = 0;
 #else
 	    if (cmd == SIOCADDMULTI)
 		error = ether_addmulti(ifr, TULIP_ETHERCOM(sc));
 	    else
 		error = ether_delmulti(ifr, TULIP_ETHERCOM(sc));
 
 	    if (error == ENETRESET) {
 		tulip_addr_filter(sc);		/* reset multicast filtering */
 		tulip_init(sc);
 		error = 0;
 	    }
 #endif
 	    break;
 	}
 #if defined(SIOCSIFMTU)
 #if !defined(ifr_mtu)
 #define ifr_mtu ifr_metric
 #endif
 	case SIOCSIFMTU:
 	    /*
 	     * Set the interface MTU.
 	     */
 	    if (ifr->ifr_mtu > ETHERMTU
 #ifdef BIG_PACKET
 		    && sc->tulip_chipid != TULIP_21140
 		    && sc->tulip_chipid != TULIP_21140A
 		    && sc->tulip_chipid != TULIP_21041
 #endif
 		) {
 		error = EINVAL;
 		break;
 	    }
 	    ifp->if_mtu = ifr->ifr_mtu;
 #ifdef BIG_PACKET
 	    tulip_reset(sc);
 	    tulip_init(sc);
 #endif
 	    break;
 #endif /* SIOCSIFMTU */
 
 #ifdef SIOCGADDRROM
 	case SIOCGADDRROM: {
 	    error = copyout(sc->tulip_rombuf, ifr->ifr_data, sizeof(sc->tulip_rombuf));
 	    break;
 	}
 #endif
 #ifdef SIOCGCHIPID
 	case SIOCGCHIPID: {
 	    ifr->ifr_metric = (int) sc->tulip_chipid;
 	    break;
 	}
 #endif
 	default: {
 	    error = EINVAL;
 	    break;
 	}
     }
 
     TULIP_RESTORESPL(s);
     TULIP_PERFEND(ifioctl);
     return error;
 }
 
 /*
  * These routines gets called at device spl (from ether_output).  This might
  * pose a problem for TULIP_USE_SOFTINTR if ether_output is called at
  * device spl from another driver.
  */
 
 static ifnet_ret_t
 tulip_ifstart(
     struct ifnet * const ifp)
 {
     TULIP_PERFSTART(ifstart)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     if (sc->tulip_if.if_flags & IFF_RUNNING) {
 
 	if ((sc->tulip_flags & (TULIP_WANTSETUP|TULIP_TXPROBE_ACTIVE)) == TULIP_WANTSETUP)
 	    tulip_txput_setup(sc);
 
 	while (sc->tulip_if.if_snd.ifq_head != NULL) {
 	    struct mbuf *m;
 	    IF_DEQUEUE(&sc->tulip_if.if_snd, m);
 	    if ((m = tulip_txput(sc, m)) != NULL) {
 		IF_PREPEND(&sc->tulip_if.if_snd, m);
 		break;
 	    }
 	}
     }
 
     TULIP_PERFEND(ifstart);
 }
 
 static ifnet_ret_t
 tulip_ifstart_one(
     struct ifnet * const ifp)
 {
     TULIP_PERFSTART(ifstart_one)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
     if ((sc->tulip_if.if_flags & IFF_RUNNING)
 	    && sc->tulip_if.if_snd.ifq_head != NULL) {
 	struct mbuf *m;
 	IF_DEQUEUE(&sc->tulip_if.if_snd, m);
 	if ((m = tulip_txput(sc, m)) != NULL)
 	    IF_PREPEND(&sc->tulip_if.if_snd, m);
     }
     TULIP_PERFEND(ifstart_one);
 }
 
 /*
  * Even though this routine runs at device spl, it does not break
  * our use of splnet (splsoftnet under NetBSD) for the majority
  * of this driver (if TULIP_USE_SOFTINTR defined) since 
  * if_watcbog is called from if_watchdog which is called from
  * splsoftclock which is below spl[soft]net.
  */
 static void
 tulip_ifwatchdog(
     struct ifnet *ifp)
 {
     TULIP_PERFSTART(ifwatchdog)
     tulip_softc_t * const sc = TULIP_IFP_TO_SOFTC(ifp);
 
 #if defined(TULIP_DEBUG)
     u_int32_t rxintrs = sc->tulip_dbg.dbg_rxintrs - sc->tulip_dbg.dbg_last_rxintrs;
     if (rxintrs > sc->tulip_dbg.dbg_high_rxintrs_hz)
 	sc->tulip_dbg.dbg_high_rxintrs_hz = rxintrs;
     sc->tulip_dbg.dbg_last_rxintrs = sc->tulip_dbg.dbg_rxintrs;
 #endif /* TULIP_DEBUG */
 
     sc->tulip_if.if_timer = 1;
     /*
      * These should be rare so do a bulk test up front so we can just skip
      * them if needed.
      */
     if (sc->tulip_flags & (TULIP_SYSTEMERROR|TULIP_RXBUFSLOW|TULIP_NOMESSAGES)) {
 	/*
 	 * If the number of receive buffer is low, try to refill
 	 */
 	if (sc->tulip_flags & TULIP_RXBUFSLOW)
 	    tulip_rx_intr(sc);
 
 	if (sc->tulip_flags & TULIP_SYSTEMERROR) {
 	    printf(TULIP_PRINTF_FMT ": %d system errors: last was %s\n",
 		   TULIP_PRINTF_ARGS, sc->tulip_system_errors,
 		   tulip_system_errors[sc->tulip_last_system_error]);
 	}
 	if (sc->tulip_statusbits) {
 	    tulip_print_abnormal_interrupt(sc, sc->tulip_statusbits);
 	    sc->tulip_statusbits = 0;
 	}
 
 	sc->tulip_flags &= ~(TULIP_NOMESSAGES|TULIP_SYSTEMERROR);
     }
 
     if (sc->tulip_txtimer)
 	tulip_tx_intr(sc);
     if (sc->tulip_txtimer && --sc->tulip_txtimer == 0) {
 	printf(TULIP_PRINTF_FMT ": transmission timeout\n", TULIP_PRINTF_ARGS);
 	if (TULIP_DO_AUTOSENSE(sc)) {
 	    sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 	    sc->tulip_probe_state = TULIP_PROBE_INACTIVE;
 	    sc->tulip_flags &= ~(TULIP_WANTRXACT|TULIP_LINKUP);
 	}
 	tulip_reset(sc);
 	tulip_init(sc);
     }
 
     TULIP_PERFEND(ifwatchdog);
     TULIP_PERFMERGE(sc, perf_intr_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_cycles);
     TULIP_PERFMERGE(sc, perf_ifioctl_cycles);
     TULIP_PERFMERGE(sc, perf_ifwatchdog_cycles);
     TULIP_PERFMERGE(sc, perf_timeout_cycles);
     TULIP_PERFMERGE(sc, perf_ifstart_one_cycles);
     TULIP_PERFMERGE(sc, perf_txput_cycles);
     TULIP_PERFMERGE(sc, perf_txintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxintr_cycles);
     TULIP_PERFMERGE(sc, perf_rxget_cycles);
     TULIP_PERFMERGE(sc, perf_intr);
     TULIP_PERFMERGE(sc, perf_ifstart);
     TULIP_PERFMERGE(sc, perf_ifioctl);
     TULIP_PERFMERGE(sc, perf_ifwatchdog);
     TULIP_PERFMERGE(sc, perf_timeout);
     TULIP_PERFMERGE(sc, perf_ifstart_one);
     TULIP_PERFMERGE(sc, perf_txput);
     TULIP_PERFMERGE(sc, perf_txintr);
     TULIP_PERFMERGE(sc, perf_rxintr);
     TULIP_PERFMERGE(sc, perf_rxget);
 }
 
 #if defined(__bsdi__) || (defined(__FreeBSD__) && BSD < 199506)
 static ifnet_ret_t
 tulip_ifwatchdog_wrapper(
     int unit)
 {
     tulip_ifwatchdog(&TULIP_UNIT_TO_SOFTC(unit)->tulip_if);
 }
 #define	tulip_ifwatchdog	tulip_ifwatchdog_wrapper
 #endif
 
 /*
  * All printf's are real as of now!
  */
 #ifdef printf
 #undef printf
 #endif
 #if !defined(IFF_NOTRAILERS)
 #define IFF_NOTRAILERS		0
 #endif
 
 static void
 tulip_attach(
     tulip_softc_t * const sc)
 {
     struct ifnet * const ifp = &sc->tulip_if;
 
     ifp->if_flags = IFF_BROADCAST|IFF_SIMPLEX|IFF_NOTRAILERS|IFF_MULTICAST;
     ifp->if_ioctl = tulip_ifioctl;
     ifp->if_start = tulip_ifstart;
     ifp->if_watchdog = tulip_ifwatchdog;
     ifp->if_timer = 1;
 #if !defined(__bsdi__) || _BSDI_VERSION < 199401
     ifp->if_output = ether_output;
 #endif
 #if defined(__bsdi__) && _BSDI_VERSION < 199401
     ifp->if_mtu = ETHERMTU;
 #endif
   
 #if defined(__bsdi__) && _BSDI_VERSION >= 199510
     aprint_naive(": DEC Ethernet");
     aprint_normal(": %s%s", sc->tulip_boardid,
         tulip_chipdescs[sc->tulip_chipid]);
     aprint_verbose(" pass %d.%d", (sc->tulip_revinfo & 0xF0) >> 4,
         sc->tulip_revinfo & 0x0F);
     printf("\n");
     sc->tulip_pf = aprint_normal;
     aprint_normal(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n",
 		  TULIP_PRINTF_ARGS,
 		  TULIP_EADDR_ARGS(sc->tulip_enaddr));
 #else
     printf(
 #if defined(__bsdi__)
 	   "\n"
 #endif
 	   TULIP_PRINTF_FMT ": %s%s pass %d.%d%s\n",
 	   TULIP_PRINTF_ARGS,
 	   sc->tulip_boardid,
 	   tulip_chipdescs[sc->tulip_chipid],
 	   (sc->tulip_revinfo & 0xF0) >> 4,
 	   sc->tulip_revinfo & 0x0F,
 	   (sc->tulip_features & (TULIP_HAVE_ISVSROM|TULIP_HAVE_OKSROM))
 		 == TULIP_HAVE_ISVSROM ? " (invalid EESPROM checksum)" : "");
     printf(TULIP_PRINTF_FMT ": address " TULIP_EADDR_FMT "\n",
 	   TULIP_PRINTF_ARGS,
 	   TULIP_EADDR_ARGS(sc->tulip_enaddr));
 #endif
 
 #if defined(__alpha__)
     /*
      * In case the SRM console told us about a bogus media,
      * we need to check to be safe.
      */
     if (sc->tulip_mediums[sc->tulip_media] == NULL)
 	sc->tulip_media = TULIP_MEDIA_UNKNOWN;
 #endif
 
     (*sc->tulip_boardsw->bd_media_probe)(sc);
 #if defined(IFM_ETHER)
     ifmedia_init(&sc->tulip_ifmedia, 0,
 		 tulip_ifmedia_change,
 		 tulip_ifmedia_status);
 #else
     {
 	tulip_media_t media;
 	int cnt;
 	printf(TULIP_PRINTF_FMT ": media:", TULIP_PRINTF_ARGS);
 	for (media = TULIP_MEDIA_UNKNOWN, cnt = 1; cnt < 7 && media < TULIP_MEDIA_MAX; media++) {
 	    if (sc->tulip_mediums[media] != NULL) {
 		printf(" %d=\"%s\"", cnt, tulip_mediums[media]);
 		cnt++;
 	    }
 	}
 	if (cnt == 1) {
 	    sc->tulip_features |= TULIP_HAVE_NOMEDIA;
 	    printf(" none\n");
 	} else {
 	    printf("\n");
 	}
     }
 #endif
     sc->tulip_flags &= ~TULIP_DEVICEPROBE;
 #if defined(IFM_ETHER)
     tulip_ifmedia_add(sc);
 #endif
 
     tulip_reset(sc);
 
 #if defined(__bsdi__) && _BSDI_VERSION >= 199510
     sc->tulip_pf = printf;
     TULIP_ETHER_IFATTACH(sc);
 #else
     if_attach(ifp);
 #if defined(__NetBSD__) || (defined(__FreeBSD__) && BSD >= 199506)
     TULIP_ETHER_IFATTACH(sc);
 #endif
 #endif /* __bsdi__ */
 
 #if NBPFILTER > 0
     TULIP_BPF_ATTACH(sc);
 #endif
 
 #if defined(__NetBSD__) && NRND > 0
     rnd_attach_source(&sc->tulip_rndsource, sc->tulip_dev.dv_xname,
 		      RND_TYPE_NET);
 #endif
 }
 
 static void
 tulip_initcsrs(
     tulip_softc_t * const sc,
     tulip_csrptr_t csr_base,
     size_t csr_size)
 {
     sc->tulip_csrs.csr_busmode		= csr_base +  0 * csr_size;
     sc->tulip_csrs.csr_txpoll		= csr_base +  1 * csr_size;
     sc->tulip_csrs.csr_rxpoll		= csr_base +  2 * csr_size;
     sc->tulip_csrs.csr_rxlist		= csr_base +  3 * csr_size;
     sc->tulip_csrs.csr_txlist		= csr_base +  4 * csr_size;
     sc->tulip_csrs.csr_status		= csr_base +  5 * csr_size;
     sc->tulip_csrs.csr_command		= csr_base +  6 * csr_size;
     sc->tulip_csrs.csr_intr		= csr_base +  7 * csr_size;
     sc->tulip_csrs.csr_missed_frames	= csr_base +  8 * csr_size;
     sc->tulip_csrs.csr_9		= csr_base +  9 * csr_size;
     sc->tulip_csrs.csr_10		= csr_base + 10 * csr_size;
     sc->tulip_csrs.csr_11		= csr_base + 11 * csr_size;
     sc->tulip_csrs.csr_12		= csr_base + 12 * csr_size;
     sc->tulip_csrs.csr_13		= csr_base + 13 * csr_size;
     sc->tulip_csrs.csr_14		= csr_base + 14 * csr_size;
     sc->tulip_csrs.csr_15		= csr_base + 15 * csr_size;
 #if defined(TULIP_EISA)
     sc->tulip_csrs.csr_enetrom		= csr_base + DE425_ENETROM_OFFSET;
 #endif
 }
 
 static void
 tulip_initring(
     tulip_softc_t * const sc,
     tulip_ringinfo_t * const ri,
     tulip_desc_t *descs,
     int ndescs)
 {
     ri->ri_max = ndescs;
     ri->ri_first = descs;
     ri->ri_last = ri->ri_first + ri->ri_max;
     bzero((caddr_t) ri->ri_first, sizeof(ri->ri_first[0]) * ri->ri_max);
     ri->ri_last[-1].d_flag = TULIP_DFLAG_ENDRING;
 }
 
 /*
  * This is the PCI configuration support.  Since the 21040 is available
  * on both EISA and PCI boards, one must be careful in how defines the
  * 21040 in the config file.
  */
 
 #define	PCI_CFID	0x00	/* Configuration ID */
 #define	PCI_CFCS	0x04	/* Configurtion Command/Status */
 #define	PCI_CFRV	0x08	/* Configuration Revision */
 #define	PCI_CFLT	0x0c	/* Configuration Latency Timer */
 #define	PCI_CBIO	0x10	/* Configuration Base IO Address */
 #define	PCI_CBMA	0x14	/* Configuration Base Memory Address */
 #define	PCI_CFIT	0x3c	/* Configuration Interrupt */
 #define	PCI_CFDA	0x40	/* Configuration Driver Area */
 
 #if defined(TULIP_EISA)
 static const int tulip_eisa_irqs[4] = { IRQ5, IRQ9, IRQ10, IRQ11 };
 #endif
 
 #if defined(__FreeBSD__)
 
 #define	TULIP_PCI_ATTACH_ARGS	pcici_t config_id, int unit
 #define	TULIP_SHUTDOWN_ARGS	int howto, void * arg
 
 #if defined(TULIP_DEVCONF)
 static void tulip_shutdown(TULIP_SHUTDOWN_ARGS);
 
 static int
 tulip_pci_shutdown(
     struct kern_devconf * const kdc,
     int force)
 {
     if (kdc->kdc_unit < TULIP_MAX_DEVICES) {
 	tulip_softc_t * const sc = TULIP_UNIT_TO_SOFTC(kdc->kdc_unit);
 	if (sc != NULL)
 	    tulip_shutdown(0, sc);
     }
     (void) dev_detach(kdc);
     return 0;
 }
 #endif
 
 static char*
 tulip_pci_probe(
     pcici_t config_id,
     pcidi_t device_id)
 {
     if (PCI_VENDORID(device_id) != DEC_VENDORID)
 	return NULL;
     if (PCI_CHIPID(device_id) == CHIPID_21040)
 	return "Digital 21040 Ethernet";
     if (PCI_CHIPID(device_id) == CHIPID_21041)
 	return "Digital 21041 Ethernet";
     if (PCI_CHIPID(device_id) == CHIPID_21140) {
 	u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF;
 	if (revinfo >= 0x20)
 	    return "Digital 21140A Fast Ethernet";
 	else
 	    return "Digital 21140 Fast Ethernet";
     }
     if (PCI_CHIPID(device_id) == CHIPID_21142) {
 	u_int32_t revinfo = pci_conf_read(config_id, PCI_CFRV) & 0xFF;
 	if (revinfo >= 0x20)
 	    return "Digital 21143 Fast Ethernet";
 	else
 	    return "Digital 21142 Fast Ethernet";
     }
     return NULL;
 }
 
 static void  tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 static u_long tulip_pci_count;
 
 static struct pci_device dedevice = {
     "de",
     tulip_pci_probe,
     tulip_pci_attach,
    &tulip_pci_count,
 #if defined(TULIP_DEVCONF)
     tulip_pci_shutdown,
 #endif
 };
 
 DATA_SET (pcidevice_set, dedevice);
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
 #define	TULIP_PCI_ATTACH_ARGS	struct device * const parent, struct device * const self, void * const aux
 #define	TULIP_SHUTDOWN_ARGS	void *arg
 
 static int
 tulip_pci_match(
     pci_devaddr_t *pa)
 {
     int irq;
     unsigned id;
 
     id = pci_inl(pa, PCI_VENDOR_ID);
     if (PCI_VENDORID(id) != DEC_VENDORID)
 	return 0;
     id = PCI_CHIPID(id);
     if (id != CHIPID_21040 && id != CHIPID_21041
 	    && id != CHIPID_21140 && id != CHIPID_21142)
 	return 0;
     irq = pci_inl(pa, PCI_I_LINE) & 0xFF;
     if (irq == 0 || irq >= 16) {
 	printf("de?: invalid IRQ %d; skipping\n", irq);
 	return 0;
     }
     return 1;
 }
 
 static int
 tulip_probe(
     struct device *parent,
     struct cfdata *cf,
     void *aux)
 {
     struct isa_attach_args * const ia = (struct isa_attach_args *) aux;
     unsigned irq, slot;
     pci_devaddr_t *pa;
 
 #if _BSDI_VERSION >= 199401
     switch (ia->ia_bustype) {
     case BUS_PCI:
 #endif
 	pa = pci_scan(tulip_pci_match);
 	if (pa == NULL)
 	    return 0;
 
 	irq = (1 << (pci_inl(pa, PCI_I_LINE) & 0xFF));
 
 	/* Get the base address; assume the BIOS set it up correctly */
 #if defined(TULIP_IOMAPPED)
 	ia->ia_maddr = NULL;
 	ia->ia_msize = 0;
 	ia->ia_iobase = pci_inl(pa, PCI_CBIO) & ~7;
 	pci_outl(pa, PCI_CBIO, 0xFFFFFFFF);
 	ia->ia_iosize = ((~pci_inl(pa, PCI_CBIO)) | 7) + 1;
 	pci_outl(pa, PCI_CBIO, (int) ia->ia_iobase);
 
 	/* Disable memory space access */
 	pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~2);
 #else
 	ia->ia_maddr = (caddr_t) (pci_inl(pa, PCI_CBMA) & ~7);
 	pci_outl(pa, PCI_CBMA, 0xFFFFFFFF);
 	ia->ia_msize = ((~pci_inl(pa, PCI_CBMA)) | 7) + 1;
 	pci_outl(pa, PCI_CBMA, (int) ia->ia_maddr);
 	ia->ia_iobase = 0;
 	ia->ia_iosize = 0;
 
 	/* Disable I/O space access */
 	pci_outl(pa, PCI_COMMAND, pci_inl(pa, PCI_COMMAND) & ~1);
 #endif /* TULIP_IOMAPPED */
 
 	ia->ia_aux = (void *) pa;
 #if _BSDI_VERSION >= 199401
 	break;
 
 #if defined(TULIP_EISA)
     case BUS_EISA: {
 	unsigned tmp;
 
 	if ((slot = eisa_match(cf, ia)) == 0)
 	    return 0;
 	ia->ia_iobase = slot << 12;
 	ia->ia_iosize = EISA_NPORT;
 	eisa_slotalloc(slot);
 	tmp = inb(ia->ia_iobase + DE425_CFG0);
 	irq = tulip_eisa_irqs[(tmp >> 1) & 0x03];
 	/*
 	 * Until BSD/OS likes level interrupts, force
 	 * the DE425 into edge-triggered mode.
 	 */
 	if ((tmp & 1) == 0)
 	    outb(ia->ia_iobase + DE425_CFG0, tmp | 1);
 	/*
 	 * CBIO needs to map to the EISA slot
 	 * enable I/O access and Master
 	 */
 	outl(ia->ia_iobase + DE425_CBIO, ia->ia_iobase);
 	outl(ia->ia_iobase + DE425_CFCS, 5 | inl(ia->ia_iobase + DE425_CFCS));
 	ia->ia_aux = NULL;
 	break;
     }
 #endif /* TULIP_EISA */
     default:
 	return 0;
     }
 #endif
 
     /* PCI bus masters don't use host DMA channels */
     ia->ia_drq = DRQNONE;
 
     if (ia->ia_irq != IRQUNK && irq != ia->ia_irq) {
 	printf("de%d: error: desired IRQ of %d does not match device's "
 	    "actual IRQ of %d,\n",
 	       cf->cf_unit,
 	       ffs(ia->ia_irq) - 1, ffs(irq) - 1);
 	return 0;
     }
     if (ia->ia_irq == IRQUNK)
 	ia->ia_irq = irq;
 #ifdef IRQSHARE
     ia->ia_irq |= IRQSHARE;
 #endif
     return 1;
 }
 
 static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 
 #if defined(TULIP_EISA)
 static char *tulip_eisa_ids[] = {
     "DEC4250",
     NULL
 };
 #endif
 
 struct cfdriver decd = {
     0, "de", tulip_probe, tulip_pci_attach,
 #if _BSDI_VERSION >= 199401
     DV_IFNET,
 #endif
     sizeof(tulip_softc_t),
 #if defined(TULIP_EISA)
     tulip_eisa_ids
 #endif
 };
 
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
 #define	TULIP_PCI_ATTACH_ARGS	struct device * const parent, struct device * const self, void * const aux
 #define	TULIP_SHUTDOWN_ARGS	void *arg
 static int
 tulip_pci_probe(
     struct device *parent,
 #ifdef __BROKEN_INDIRECT_CONFIG
     void *match,
 #else
     struct cfdata *match,
 #endif
     void *aux)
 {
     struct pci_attach_args *pa = (struct pci_attach_args *) aux;
 
     if (PCI_VENDORID(pa->pa_id) != DEC_VENDORID)
 	return 0;
     if (PCI_CHIPID(pa->pa_id) == CHIPID_21040
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21041
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21140
 	    || PCI_CHIPID(pa->pa_id) == CHIPID_21142)
 	return 1;
 
     return 0;
 }
 
 static void tulip_pci_attach(TULIP_PCI_ATTACH_ARGS);
 
 struct cfattach de_ca = {
     sizeof(tulip_softc_t), tulip_pci_probe, tulip_pci_attach
 };
 
 struct cfdriver de_cd = {
     0, "de", DV_IFNET
 };
 
 #endif /* __NetBSD__ */
 
 static void
 tulip_shutdown(
     TULIP_SHUTDOWN_ARGS)
 {
     tulip_softc_t * const sc = arg;
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(10);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 }
 
 static void
 tulip_pci_attach(
     TULIP_PCI_ATTACH_ARGS)
 {
 #if defined(__FreeBSD__)
     tulip_softc_t *sc;
 #define	PCI_CONF_WRITE(r, v)	pci_conf_write(config_id, (r), (v))
 #define	PCI_CONF_READ(r)	pci_conf_read(config_id, (r))
 #if __FreeBSD__ >= 3
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = (config_id->bus), /* XXX */ \
 					(sc)->tulip_pci_devno = (config_id->slot))) /* XXX */
 #else
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = ((config_id.cfg1 >> 16) & 0xFF), /* XXX */ \
 					(sc)->tulip_pci_devno = ((config_id.cfg1 >> 11) & 0x1F))) /* XXX */
 #endif
 #endif
 #if defined(__bsdi__)
     tulip_softc_t * const sc = (tulip_softc_t *) self;
     struct isa_attach_args * const ia = (struct isa_attach_args *) aux;
     pci_devaddr_t *pa = (pci_devaddr_t *) ia->ia_aux;
     const int unit = sc->tulip_dev.dv_unit;
 #define	PCI_CONF_WRITE(r, v)	pci_outl(pa, (r), (v))
 #define	PCI_CONF_READ(r)	pci_inl(pa, (r))
 #define	PCI_GETBUSDEVINFO(sc)	((void)((sc)->tulip_pci_busno = pa->d_bus, \
 					(sc)->tulip_pci_devno = pa->d_agent))
 #endif
 #if defined(__NetBSD__)
     tulip_softc_t * const sc = (tulip_softc_t *) self;
     struct pci_attach_args * const pa = (struct pci_attach_args *) aux;
     const int unit = sc->tulip_dev.dv_unit;
 #define	PCI_CONF_WRITE(r, v)	pci_conf_write(pa->pa_pc, pa->pa_tag, (r), (v))
 #define	PCI_CONF_READ(r)	pci_conf_read(pa->pa_pc, pa->pa_tag, (r))
 #define	PCI_GETBUSDEVINFO(sc)	do { \
 	(sc)->tulip_pci_busno = parent; \
 	(sc)->tulip_pci_devno = pa->pa_device; \
     } while (0)
 #endif /* __NetBSD__ */
 #if defined(__alpha__)
     tulip_media_t media = TULIP_MEDIA_UNKNOWN;
 #endif
     int retval, idx;
     u_int32_t revinfo, cfdainfo, id;
 #if !defined(TULIP_IOMAPPED) && defined(__FreeBSD__)
     vm_offset_t pa_csrs;
 #endif
     unsigned csroffset = TULIP_PCI_CSROFFSET;
     unsigned csrsize = TULIP_PCI_CSRSIZE;
     tulip_csrptr_t csr_base;
     tulip_chipid_t chipid = TULIP_CHIPID_UNKNOWN;
 
     if (unit >= TULIP_MAX_DEVICES) {
 #ifdef __FreeBSD__
 	printf("de%d", unit);
 #endif
 	printf(": not configured; limit of %d reached or exceeded\n",
 	       TULIP_MAX_DEVICES);
 	return;
     }
 
 #if defined(__bsdi__)
     if (pa != NULL) {
 	revinfo = pci_inl(pa, PCI_CFRV) & 0xFF;
 	id = pci_inl(pa, PCI_CFID);
 	cfdainfo = pci_inl(pa, PCI_CFDA);
 #if defined(TULIP_EISA)
     } else {
 	revinfo = inl(ia->ia_iobase + DE425_CFRV) & 0xFF;
 	csroffset = TULIP_EISA_CSROFFSET;
 	csrsize = TULIP_EISA_CSRSIZE;
 	chipid = TULIP_DE425;
 	cfdainfo = 0;
 #endif /* TULIP_EISA */
     }
 #else /* __bsdi__ */
     revinfo  = PCI_CONF_READ(PCI_CFRV) & 0xFF;
     id       = PCI_CONF_READ(PCI_CFID);
     cfdainfo = PCI_CONF_READ(PCI_CFDA);
 #endif /* __bsdi__ */
 
     if (PCI_VENDORID(id) == DEC_VENDORID) {
 	if (PCI_CHIPID(id) == CHIPID_21040) chipid = TULIP_21040;
 	else if (PCI_CHIPID(id) == CHIPID_21140) {
 	    chipid = (revinfo >= 0x20) ? TULIP_21140A : TULIP_21140;
 	} else if (PCI_CHIPID(id) == CHIPID_21142) {
 	    chipid = (revinfo >= 0x20) ? TULIP_21143 : TULIP_21142;
 	}
 	else if (PCI_CHIPID(id) == CHIPID_21041) chipid = TULIP_21041;
 	else if (PCI_CHIPID(id) == CHIPID_21142) chipid = TULIP_21142;
     }
     if (chipid == TULIP_CHIPID_UNKNOWN)
 	return;
 
     if ((chipid == TULIP_21040 || chipid == TULIP_DE425) && revinfo < 0x20) {
 #ifdef __FreeBSD__
 	printf("de%d", unit);
 #endif
 	printf(": not configured; 21040 pass 2.0 required (%d.%d found)\n",
 	       revinfo >> 4, revinfo & 0x0f);
 	return;
     } else if (chipid == TULIP_21140 && revinfo < 0x11) {
 #ifndef __FreeBSD__
 	printf("\n");
 #endif
 	printf("de%d: not configured; 21140 pass 1.1 required (%d.%d found)\n",
 	       unit, revinfo >> 4, revinfo & 0x0f);
 	return;
     }
 
 #if defined(__FreeBSD__)
     sc = (tulip_softc_t *) malloc(sizeof(*sc), M_DEVBUF, M_NOWAIT);
     if (sc == NULL)
 	return;
     bzero(sc, sizeof(*sc));				/* Zero out the softc*/
     sc->tulip_rxdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_RXDESCS, M_DEVBUF, M_NOWAIT);
     sc->tulip_txdescs = (tulip_desc_t *) malloc(sizeof(tulip_desc_t) * TULIP_TXDESCS, M_DEVBUF, M_NOWAIT);
     if (sc->tulip_rxdescs == NULL || sc->tulip_txdescs == NULL) {
 	if (sc->tulip_rxdescs)
 	    free((caddr_t) sc->tulip_rxdescs, M_DEVBUF);
 	if (sc->tulip_txdescs)
 	    free((caddr_t) sc->tulip_txdescs, M_DEVBUF);
 	free((caddr_t) sc, M_DEVBUF);
 	return;
     }
 #endif
 
     PCI_GETBUSDEVINFO(sc);
     sc->tulip_chipid = chipid;
     sc->tulip_flags |= TULIP_DEVICEPROBE;
     if (chipid == TULIP_21140 || chipid == TULIP_21140A)
 	sc->tulip_features |= TULIP_HAVE_GPR|TULIP_HAVE_STOREFWD;
     if (chipid == TULIP_21140A && revinfo <= 0x22)
 	sc->tulip_features |= TULIP_HAVE_RXBADOVRFLW;
     if (chipid == TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_BROKEN_HASH;
     if (chipid != TULIP_21040 && chipid != TULIP_DE425 && chipid != TULIP_21140)
 	sc->tulip_features |= TULIP_HAVE_POWERMGMT;
     if (chipid == TULIP_21041 || chipid == TULIP_21142 || chipid == TULIP_21143) {
 	sc->tulip_features |= TULIP_HAVE_DUALSENSE;
 	if (chipid != TULIP_21041 || sc->tulip_revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIANWAY;
 	if (chipid != TULIP_21041)
 	    sc->tulip_features |= TULIP_HAVE_SIAGP|TULIP_HAVE_RXBADOVRFLW|TULIP_HAVE_STOREFWD;
 	if (chipid != TULIP_21041 && sc->tulip_revinfo >= 0x20)
 	    sc->tulip_features |= TULIP_HAVE_SIA100;
     }
 
     if (sc->tulip_features & TULIP_HAVE_POWERMGMT
 	    && (cfdainfo & (TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE))) {
 	cfdainfo &= ~(TULIP_CFDA_SLEEP|TULIP_CFDA_SNOOZE);
 	PCI_CONF_WRITE(PCI_CFDA, cfdainfo);
 	DELAY(11*1000);
     }
 #if defined(__alpha__) && defined(__NetBSD__)
     /*
      * The Alpha SRM console encodes a console set media in the driver
      * part of the CFDA register.  Note that the Multia presents a
      * problem in that its BNC mode is really EXTSIA.  So in that case
      * force a probe.
      */
     switch ((cfdainfo >> 8) & 0xff) {
     case 1: media = chipid > TULIP_DE425 ?
         TULIP_MEDIA_AUI : TULIP_MEDIA_AUIBNC; break;
     case 2: media = chipid > TULIP_DE425 ?
         TULIP_MEDIA_BNC : TULIP_MEDIA_UNKNOWN; break;
     case 3: media = TULIP_MEDIA_10BASET; break;
     case 4: media = TULIP_MEDIA_10BASET_FD; break;
     case 5: media = TULIP_MEDIA_100BASETX; break;
     case 6: media = TULIP_MEDIA_100BASETX_FD; break;
     }
 #endif
 
 #if defined(__NetBSD__)
     bcopy(self->dv_xname, sc->tulip_if.if_xname, IFNAMSIZ);
     sc->tulip_if.if_softc = sc;
     sc->tulip_pc = pa->pa_pc;
 #else
     sc->tulip_unit = unit;
     sc->tulip_name = "de";
 #endif
     sc->tulip_revinfo = revinfo;
 #if defined(__FreeBSD__)
 #if BSD >= 199506
     sc->tulip_if.if_softc = sc;
 #endif
 #if defined(TULIP_IOMAPPED)
     retval = pci_map_port(config_id, PCI_CBIO, &csr_base);
 #else
     retval = pci_map_mem(config_id, PCI_CBMA, (vm_offset_t *) &csr_base, &pa_csrs);
 #endif
     if (!retval) {
 	free((caddr_t) sc->tulip_rxdescs, M_DEVBUF);
 	free((caddr_t) sc->tulip_txdescs, M_DEVBUF);
 	free((caddr_t) sc, M_DEVBUF);
 	return;
     }
     tulips[unit] = sc;
 #endif /* __FreeBSD__ */
 
 #if defined(__bsdi__)
     sc->tulip_pf = printf;
 #if defined(TULIP_IOMAPPED)
     csr_base = ia->ia_iobase;
 #else
     csr_base = (vm_offset_t) mapphys((vm_offset_t) ia->ia_maddr, ia->ia_msize);
 #endif
 #endif /* __bsdi__ */
 
 #if defined(__NetBSD__)
     csr_base = 0;
     {
 	bus_space_tag_t iot, memt;
 	bus_space_handle_t ioh, memh;
 	int ioh_valid, memh_valid;
 
 	ioh_valid = (pci_mapreg_map(pa, PCI_CBIO, PCI_MAPREG_TYPE_IO, 0,
 				    &iot, &ioh, NULL, NULL) == 0);
 	memh_valid = (pci_mapreg_map(pa, PCI_CBMA,
 				     PCI_MAPREG_TYPE_MEM |
 				     PCI_MAPREG_MEM_TYPE_32BIT,
 				     0, &memt, &memh, NULL, NULL) == 0);
 	if (memh_valid) {
 	    sc->tulip_bustag = memt;
 	    sc->tulip_bushandle = memh;
 	} else if (ioh_valid) {
 	    sc->tulip_bustag = iot;
 	    sc->tulip_bushandle = ioh;
 	} else {
 	    printf(": unable to map device registers\n");
 	    return;
 	}
     }
 #endif /* __NetBSD__ */
 
     tulip_initcsrs(sc, csr_base + csroffset, csrsize);
     tulip_initring(sc, &sc->tulip_rxinfo, sc->tulip_rxdescs, TULIP_RXDESCS);
     tulip_initring(sc, &sc->tulip_txinfo, sc->tulip_txdescs, TULIP_TXDESCS);
 
     /*
      * Make sure there won't be any interrupts or such...
      */
     TULIP_CSR_WRITE(sc, csr_busmode, TULIP_BUSMODE_SWRESET);
     DELAY(100);	/* Wait 10 microseconds (actually 50 PCI cycles but at 
 		   33MHz that comes to two microseconds but wait a
 		   bit longer anyways) */
 
     if ((retval = tulip_read_macaddr(sc)) < 0) {
 #if defined(__FreeBSD__)
 	printf(TULIP_PRINTF_FMT, TULIP_PRINTF_ARGS);
 #endif
 	printf(": can't read ENET ROM (why=%d) (", retval);
 	for (idx = 0; idx < 32; idx++)
 	    printf("%02x", sc->tulip_rombuf[idx]);
 	printf("\n");
 	printf(TULIP_PRINTF_FMT ": %s%s pass %d.%d\n",
 	       TULIP_PRINTF_ARGS,
 	       sc->tulip_boardid, tulip_chipdescs[sc->tulip_chipid],
 	       (sc->tulip_revinfo & 0xF0) >> 4, sc->tulip_revinfo & 0x0F);
 	printf(TULIP_PRINTF_FMT ": address unknown\n", TULIP_PRINTF_ARGS);
     } else {
 	tulip_spl_t s;
 	tulip_intrfunc_t (*intr_rtn)(void *) = tulip_intr_normal;
 
 	if (sc->tulip_features & TULIP_HAVE_SHAREDINTR)
 	    intr_rtn = tulip_intr_shared;
 
 #if defined(__NetBSD__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    pci_intr_handle_t intrhandle;
 	    const char *intrstr;
 
 	    if (pci_intr_map(pa->pa_pc, pa->pa_intrtag, pa->pa_intrpin,
 			     pa->pa_intrline, &intrhandle)) {
 		printf(": couldn't map interrupt\n");
 		return;
 	    }
 	    intrstr = pci_intr_string(pa->pa_pc, intrhandle);
 	    sc->tulip_ih = pci_intr_establish(pa->pa_pc, intrhandle, IPL_NET,
 					      intr_rtn, sc);
 	    if (sc->tulip_ih == NULL)
 		printf(": couldn't establish interrupt");
 	    if (intrstr != NULL)
 		printf(" at %s", intrstr);
 	    printf("\n");
 	    if (sc->tulip_ih == NULL)
 		return;
 	}
 	sc->tulip_ats = shutdownhook_establish(tulip_shutdown, sc);
 	if (sc->tulip_ats == NULL)
 	    printf("\n%s: warning: couldn't establish shutdown hook\n",
 		   sc->tulip_xname);
 #endif
 #if defined(__FreeBSD__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    if (!pci_map_int (config_id, intr_rtn, (void*) sc, &net_imask)) {
 		printf(TULIP_PRINTF_FMT ": couldn't map interrupt\n",
 		       TULIP_PRINTF_ARGS);
 		return;
 	    }
 	}
 #if !defined(TULIP_DEVCONF)
 	at_shutdown(tulip_shutdown, sc, SHUTDOWN_POST_SYNC);
 #endif
 #endif
 #if defined(__bsdi__)
 	if ((sc->tulip_features & TULIP_HAVE_SLAVEDINTR) == 0) {
 	    isa_establish(&sc->tulip_id, &sc->tulip_dev);
 
 	    sc->tulip_ih.ih_fun = intr_rtn;
 	    sc->tulip_ih.ih_arg = (void *) sc;
 	    intr_establish(ia->ia_irq, &sc->tulip_ih, DV_NET);
 	}
 
 	sc->tulip_ats.func = tulip_shutdown;
 	sc->tulip_ats.arg = (void *) sc;
 	atshutdown(&sc->tulip_ats, ATSH_ADD);
 #endif
 #if defined(TULIP_USE_SOFTINTR)
 	if (sc->tulip_unit > tulip_softintr_max_unit)
 	    tulip_softintr_max_unit = sc->tulip_unit;
 #endif
 
 	s = TULIP_RAISESPL();
 	tulip_reset(sc);
 	tulip_attach(sc);
 #if defined(__alpha__) && defined(__NetBSD__)
 	if (media != TULIP_MEDIA_UNKNOWN)
 	    tulip_linkup(sc, media);
 #endif
 	TULIP_RESTORESPL(s);
     }
 }
Index: head/sys/sys/bio.h
===================================================================
--- head/sys/sys/bio.h	(revision 34265)
+++ head/sys/sys/bio.h	(revision 34266)
@@ -1,309 +1,329 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $
+ * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $
  */
 
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
 #include <sys/queue.h>
 
 #define NOLIST ((struct buf *)0x87654321)
 
 struct buf;
+struct mount;
 
+/*
+ * To avoid including <ufs/ffs/softdep.h> 
+ */   
+LIST_HEAD(workhead, worklist);
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+extern struct bio_ops {
+	void	(*io_start) __P((struct buf *));
+	void	(*io_complete) __P((struct buf *));
+	void	(*io_deallocate) __P((struct buf *));
+	int	(*io_sync) __P((struct mount *));
+} bioops;
+
 struct iodone_chain {
 	long	ic_prev_flags;
 	void	(*ic_prev_iodone) __P((struct buf *));
 	void	*ic_prev_iodone_chain;
 	struct {
 		long	ia_long;
 		void	*ia_ptr;
 	}	ic_args[5];
 };
 
 /*
  * The buffer header describes an I/O operation in the kernel.
  */
 struct buf {
 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
 	long	b_flags;		/* B_* flags. */
 	unsigned short b_qindex;	/* buffer queue index */
 	unsigned char b_usecount;	/* buffer use count */
 	int	b_error;		/* Errno value. */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_bcount;		/* Valid bytes in buffer. */
 	long	b_resid;		/* Remaining I/O. */
 	dev_t	b_dev;			/* Device associated with buffer. */
 	caddr_t	b_data;			/* Memory, superblocks, indirect etc. */
 	caddr_t	b_kvabase;		/* base kva for buffer */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t	b_lblkno;		/* Logical block number. */
 	daddr_t	b_blkno;		/* Underlying physical block number. */
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 					/* For nested b_iodone's. */
 	struct	iodone_chain *b_iodone_chain;
 	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	int	b_generation;	/* Generation count of buffer */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
 	daddr_t	b_pblkno;               /* physical block number */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
 	caddr_t	b_savekva;              /* saved kva for transfer while bouncing */
 	void	*b_driver1;		/* for private use by the driver */
 	void	*b_driver2;		/* for private use by the driver */
 	void	*b_spc;
 	union	cluster_info {
 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
 		TAILQ_ENTRY(buf) cluster_entry;
 	} b_cluster;
 	struct	vm_page *b_pages[btoc(MAXPHYS)];
 	int		b_npages;
+	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 /*
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
 #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
 #define	B_ERROR		0x00000800	/* I/O error occurred. */
 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
 #define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define	B_ORDERED	0x08000000	/* Must guarantee I/O ordering */
 #define B_RAM		0x10000000	/* Read ahead mark (flag) */
 #define B_VMIO		0x20000000	/* VMIO flag */
 #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
 #define B_BOUNCE	0x80000000	/* bounce buffer flag */
 
 typedef struct buf_queue_head {
 	TAILQ_HEAD(, buf) queue;
 	struct	buf *insert_point;
 	struct	buf *switch_point;
 } buf_queue_head, *buf_queue_head_t;
 
 static __inline void bufq_init __P((buf_queue_head *head));
 
 static __inline void bufq_insert_tail __P((buf_queue_head *head,
 						struct buf *bp));
 
 static __inline void bufq_remove __P((buf_queue_head *head,
 					   struct buf *bp));
 
 static __inline struct buf *bufq_first __P((buf_queue_head *head));
 
 static __inline void
 bufq_init(buf_queue_head *head)
 {
 	TAILQ_INIT(&head->queue);
 	head->insert_point = NULL;
 	head->switch_point = NULL;
 }
 
 static __inline void
 bufq_insert_tail(buf_queue_head *head, struct buf *bp)
 {
 	if ((bp->b_flags & B_ORDERED) != 0) {
 		head->insert_point = bp;
 		head->switch_point = NULL;
 	}
 	TAILQ_INSERT_TAIL(&head->queue, bp, b_act);
 }
 
 static __inline void
 bufq_remove(buf_queue_head *head, struct buf *bp)
 {
 	if (bp == TAILQ_FIRST(&head->queue)) {
 		if (bp == head->insert_point)
 			head->insert_point = NULL;
 		if (TAILQ_NEXT(bp, b_act) == head->switch_point)
 			head->switch_point = NULL;
 	} else {
 		if (bp == head->insert_point) {
 			/*
 			 * Not 100% correct (we really want the
 			 * previous bp), but it will ensure queue
 			 * ordering and is less expensive than
 			 * using a CIRCLEQ.
 			 */
 			head->insert_point = TAILQ_NEXT(bp, b_act);
 		}
 		if (bp == head->switch_point) {
 			head->switch_point = TAILQ_NEXT(bp, b_act);
 		}		
 	}
 	TAILQ_REMOVE(&head->queue, bp, b_act);
 }
 
 static __inline struct buf *
 bufq_first(buf_queue_head *head)
 {
 	return (TAILQ_FIRST(&head->queue));
 }
 
 
 /*
  * number of buffer hash entries
  */
 #define BUFHSZ 512
 
 /*
  * buffer hash table calculation, originally by David Greenman
  */
 #define BUFHASH(vnp, bn)        \
 	(&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ])
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	6	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_LOCKED	1	/* locked buffers */
 #define QUEUE_LRU	2	/* useful buffers */
 #define QUEUE_VMIO	3	/* VMIO buffers */
 #define QUEUE_AGE	4	/* not-useful buffers */
 #define QUEUE_EMPTY	5	/* empty buffer headers*/
 
 /*
  * Zero out the buffer's data area.
  */
 #define	clrbuf(bp) {							\
 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
 	(bp)->b_resid = 0;						\
 }
 
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
 
 #ifdef KERNEL
 extern int	nbuf;			/* The number of buffer headers */
 extern struct	buf *buf;		/* The buffer headers. */
 extern char	*buffers;		/* The buffer contents. */
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern int	needsbuffer, numdirtybuffers;
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
 void	bufinit __P((void));
 void	bremfree __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 	    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 	    struct ucred *, struct buf **));
 int	bwrite __P((struct buf *));
 void	bdwrite __P((struct buf *));
 void	bawrite __P((struct buf *));
+void	bdirty __P((struct buf *));
 int	bowrite __P((struct buf *));
 void	brelse __P((struct buf *));
 void	bqrelse __P((struct buf *));
 int	vfs_bio_awrite __P((struct buf *));
 struct buf *     getpbuf __P((void));
 struct buf *incore __P((struct vnode *, daddr_t));
 struct buf *gbincore __P((struct vnode *, daddr_t));
 int	inmem __P((struct vnode *, daddr_t));
 struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
 struct buf *geteblk __P((int));
 int	allocbuf __P((struct buf *, int));
 int	biowait __P((struct buf *));
 void	biodone __P((struct buf *));
 
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **));
 int	cluster_wbuild __P((struct vnode *, long, daddr_t, int));
 void	cluster_write __P((struct buf *, u_quad_t));
 int	physio __P((void (*)(struct buf *), struct buf *, dev_t, 
 	    int, u_int (*)(struct buf *), struct uio *));
 u_int	minphys __P((struct buf *));
 void	vfs_bio_clrbuf __P((struct buf *));
 void	vfs_busy_pages __P((struct buf *, int clear_modify));
 void	vfs_unbusy_pages __P((struct buf *));
 void	vwakeup __P((struct buf *));
 void	vmapbuf __P((struct buf *));
 void	vunmapbuf __P((struct buf *));
 void	relpbuf __P((struct buf *));
 void	brelvp __P((struct buf *));
 void	bgetvp __P((struct vnode *, struct buf *));
 void	pbgetvp __P((struct vnode *, struct buf *));
 void	pbrelvp __P((struct buf *));
 void	reassignbuf __P((struct buf *, struct vnode *));
 struct	buf *trypbuf __P((void));
 void	vm_bounce_alloc __P((struct buf *));
 void	vm_bounce_free __P((struct buf *));
 vm_offset_t	vm_bounce_kva_alloc __P((int));
 void	vm_bounce_kva_alloc_free __P((vm_offset_t, int));
 void	vfs_bio_need_satisfy __P((void));
 #endif /* KERNEL */
 
 #endif /* !_SYS_BUF_H_ */
Index: head/sys/sys/buf.h
===================================================================
--- head/sys/sys/buf.h	(revision 34265)
+++ head/sys/sys/buf.h	(revision 34266)
@@ -1,309 +1,329 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.45 1998/01/22 17:30:10 dyson Exp $
+ * $Id: buf.h,v 1.46 1998/03/07 21:36:20 dyson Exp $
  */
 
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
 #include <sys/queue.h>
 
 #define NOLIST ((struct buf *)0x87654321)
 
 struct buf;
+struct mount;
 
+/*
+ * To avoid including <ufs/ffs/softdep.h> 
+ */   
+LIST_HEAD(workhead, worklist);
+/*
+ * These are currently used only by the soft dependency code, hence
+ * are stored once in a global variable. If other subsystems wanted
+ * to use these hooks, a pointer to a set of bio_ops could be added
+ * to each buffer.
+ */
+extern struct bio_ops {
+	void	(*io_start) __P((struct buf *));
+	void	(*io_complete) __P((struct buf *));
+	void	(*io_deallocate) __P((struct buf *));
+	int	(*io_sync) __P((struct mount *));
+} bioops;
+
 struct iodone_chain {
 	long	ic_prev_flags;
 	void	(*ic_prev_iodone) __P((struct buf *));
 	void	*ic_prev_iodone_chain;
 	struct {
 		long	ia_long;
 		void	*ia_ptr;
 	}	ic_args[5];
 };
 
 /*
  * The buffer header describes an I/O operation in the kernel.
  */
 struct buf {
 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
 	long	b_flags;		/* B_* flags. */
 	unsigned short b_qindex;	/* buffer queue index */
 	unsigned char b_usecount;	/* buffer use count */
 	int	b_error;		/* Errno value. */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_bcount;		/* Valid bytes in buffer. */
 	long	b_resid;		/* Remaining I/O. */
 	dev_t	b_dev;			/* Device associated with buffer. */
 	caddr_t	b_data;			/* Memory, superblocks, indirect etc. */
 	caddr_t	b_kvabase;		/* base kva for buffer */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t	b_lblkno;		/* Logical block number. */
 	daddr_t	b_blkno;		/* Underlying physical block number. */
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 					/* For nested b_iodone's. */
 	struct	iodone_chain *b_iodone_chain;
 	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	int	b_generation;	/* Generation count of buffer */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
 	daddr_t	b_pblkno;               /* physical block number */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
 	caddr_t	b_savekva;              /* saved kva for transfer while bouncing */
 	void	*b_driver1;		/* for private use by the driver */
 	void	*b_driver2;		/* for private use by the driver */
 	void	*b_spc;
 	union	cluster_info {
 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
 		TAILQ_ENTRY(buf) cluster_entry;
 	} b_cluster;
 	struct	vm_page *b_pages[btoc(MAXPHYS)];
 	int		b_npages;
+	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 /*
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
 #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
 #define	B_ERROR		0x00000800	/* I/O error occurred. */
 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
 #define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define	B_ORDERED	0x08000000	/* Must guarantee I/O ordering */
 #define B_RAM		0x10000000	/* Read ahead mark (flag) */
 #define B_VMIO		0x20000000	/* VMIO flag */
 #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
 #define B_BOUNCE	0x80000000	/* bounce buffer flag */
 
 typedef struct buf_queue_head {
 	TAILQ_HEAD(, buf) queue;
 	struct	buf *insert_point;
 	struct	buf *switch_point;
 } buf_queue_head, *buf_queue_head_t;
 
 static __inline void bufq_init __P((buf_queue_head *head));
 
 static __inline void bufq_insert_tail __P((buf_queue_head *head,
 						struct buf *bp));
 
 static __inline void bufq_remove __P((buf_queue_head *head,
 					   struct buf *bp));
 
 static __inline struct buf *bufq_first __P((buf_queue_head *head));
 
 static __inline void
 bufq_init(buf_queue_head *head)
 {
 	TAILQ_INIT(&head->queue);
 	head->insert_point = NULL;
 	head->switch_point = NULL;
 }
 
 static __inline void
 bufq_insert_tail(buf_queue_head *head, struct buf *bp)
 {
 	if ((bp->b_flags & B_ORDERED) != 0) {
 		head->insert_point = bp;
 		head->switch_point = NULL;
 	}
 	TAILQ_INSERT_TAIL(&head->queue, bp, b_act);
 }
 
 static __inline void
 bufq_remove(buf_queue_head *head, struct buf *bp)
 {
 	if (bp == TAILQ_FIRST(&head->queue)) {
 		if (bp == head->insert_point)
 			head->insert_point = NULL;
 		if (TAILQ_NEXT(bp, b_act) == head->switch_point)
 			head->switch_point = NULL;
 	} else {
 		if (bp == head->insert_point) {
 			/*
 			 * Not 100% correct (we really want the
 			 * previous bp), but it will ensure queue
 			 * ordering and is less expensive than
 			 * using a CIRCLEQ.
 			 */
 			head->insert_point = TAILQ_NEXT(bp, b_act);
 		}
 		if (bp == head->switch_point) {
 			head->switch_point = TAILQ_NEXT(bp, b_act);
 		}		
 	}
 	TAILQ_REMOVE(&head->queue, bp, b_act);
 }
 
 static __inline struct buf *
 bufq_first(buf_queue_head *head)
 {
 	return (TAILQ_FIRST(&head->queue));
 }
 
 
 /*
  * number of buffer hash entries
  */
 #define BUFHSZ 512
 
 /*
  * buffer hash table calculation, originally by David Greenman
  */
 #define BUFHASH(vnp, bn)        \
 	(&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ])
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	6	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_LOCKED	1	/* locked buffers */
 #define QUEUE_LRU	2	/* useful buffers */
 #define QUEUE_VMIO	3	/* VMIO buffers */
 #define QUEUE_AGE	4	/* not-useful buffers */
 #define QUEUE_EMPTY	5	/* empty buffer headers*/
 
 /*
  * Zero out the buffer's data area.
  */
 #define	clrbuf(bp) {							\
 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
 	(bp)->b_resid = 0;						\
 }
 
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
 
 #ifdef KERNEL
 extern int	nbuf;			/* The number of buffer headers */
 extern struct	buf *buf;		/* The buffer headers. */
 extern char	*buffers;		/* The buffer contents. */
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern int	needsbuffer, numdirtybuffers;
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
 void	bufinit __P((void));
 void	bremfree __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 	    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 	    struct ucred *, struct buf **));
 int	bwrite __P((struct buf *));
 void	bdwrite __P((struct buf *));
 void	bawrite __P((struct buf *));
+void	bdirty __P((struct buf *));
 int	bowrite __P((struct buf *));
 void	brelse __P((struct buf *));
 void	bqrelse __P((struct buf *));
 int	vfs_bio_awrite __P((struct buf *));
 struct buf *     getpbuf __P((void));
 struct buf *incore __P((struct vnode *, daddr_t));
 struct buf *gbincore __P((struct vnode *, daddr_t));
 int	inmem __P((struct vnode *, daddr_t));
 struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
 struct buf *geteblk __P((int));
 int	allocbuf __P((struct buf *, int));
 int	biowait __P((struct buf *));
 void	biodone __P((struct buf *));
 
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **));
 int	cluster_wbuild __P((struct vnode *, long, daddr_t, int));
 void	cluster_write __P((struct buf *, u_quad_t));
 int	physio __P((void (*)(struct buf *), struct buf *, dev_t, 
 	    int, u_int (*)(struct buf *), struct uio *));
 u_int	minphys __P((struct buf *));
 void	vfs_bio_clrbuf __P((struct buf *));
 void	vfs_busy_pages __P((struct buf *, int clear_modify));
 void	vfs_unbusy_pages __P((struct buf *));
 void	vwakeup __P((struct buf *));
 void	vmapbuf __P((struct buf *));
 void	vunmapbuf __P((struct buf *));
 void	relpbuf __P((struct buf *));
 void	brelvp __P((struct buf *));
 void	bgetvp __P((struct vnode *, struct buf *));
 void	pbgetvp __P((struct vnode *, struct buf *));
 void	pbrelvp __P((struct buf *));
 void	reassignbuf __P((struct buf *, struct vnode *));
 struct	buf *trypbuf __P((void));
 void	vm_bounce_alloc __P((struct buf *));
 void	vm_bounce_free __P((struct buf *));
 vm_offset_t	vm_bounce_kva_alloc __P((int));
 void	vm_bounce_kva_alloc_free __P((vm_offset_t, int));
 void	vfs_bio_need_satisfy __P((void));
 #endif /* KERNEL */
 
 #endif /* !_SYS_BUF_H_ */
Index: head/sys/sys/malloc.h
===================================================================
--- head/sys/sys/malloc.h	(revision 34265)
+++ head/sys/sys/malloc.h	(revision 34266)
@@ -1,216 +1,218 @@
 /*
  * Copyright (c) 1987, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)malloc.h	8.5 (Berkeley) 5/3/95
- * $Id: malloc.h,v 1.35 1997/12/05 19:14:36 bde Exp $
+ * $Id: malloc.h,v 1.36 1997/12/27 09:42:03 bde Exp $
  */
 
 #ifndef _SYS_MALLOC_H_
 #define	_SYS_MALLOC_H_
 
+#define splmem splhigh
+
 #define KMEMSTATS
 
 /*
  * flags to malloc
  */
 #define	M_WAITOK	0x0000
 #define	M_NOWAIT	0x0001
 #define M_KERNEL	0x0002
 
 #define	M_MAGIC		877983977	/* time when first defined :-) */
 
 struct malloc_type {
 	struct malloc_type *ks_next;	/* next in list */
 	long 	ks_memuse;	/* total memory held in bytes */
 	long	ks_limit;	/* most that are allowed to exist */
 	long	ks_size;	/* sizes of this thing that are allocated */
 	long	ks_inuse;	/* # of packets of this type currently in use */
 	long	ks_calls;	/* total packets of this type ever allocated */
 	long	ks_maxused;	/* maximum number ever used */
 	u_long	ks_magic;	/* if it's not magic, don't touch it */
 	const char *ks_shortdesc;	/* short description */
 	u_short	ks_limblocks;	/* number of times blocked for hitting limit */
 	u_short	ks_mapblocks;	/* number of times blocked for kernel map */
 };
 
 #define	MALLOC_DEFINE(type, shortdesc, longdesc) \
 	struct malloc_type type[1] = { \
 		{ NULL, 0, 0, 0, 0, 0, 0, M_MAGIC, shortdesc, 0, 0 } \
 	}; \
 	struct __hack
 
 #define	MALLOC_DECLARE(type) \
 	extern struct malloc_type type[1]; \
 	struct __hack
 
 #ifdef MALLOC_INSTANTIATE
 #define	MALLOC_MAKE_TYPE(type, shortdesc, longdesc) \
 	MALLOC_DEFINE(type, shortdesc, longdesc)
 #else
 #define	MALLOC_MAKE_TYPE(type, shortdesc, longdesc) \
 	MALLOC_DECLARE(type)
 #endif
 
 MALLOC_MAKE_TYPE(M_CACHE, "namecache", "Dynamically allocated cache entries");
 MALLOC_MAKE_TYPE(M_DEVBUF, "devbuf", "device driver memory");
 MALLOC_MAKE_TYPE(M_TEMP, "temp", "misc temporary data buffers");
 
 /*
  * Array of descriptors that describe the contents of each page
  */
 struct kmemusage {
 	short ku_indx;		/* bucket index */
 	union {
 		u_short freecnt;/* for small allocations, free pieces in page */
 		u_short pagecnt;/* for large allocations, pages alloced */
 	} ku_un;
 };
 #define ku_freecnt ku_un.freecnt
 #define ku_pagecnt ku_un.pagecnt
 
 /*
  * Set of buckets for each size of memory block that is retained
  */
 struct kmembuckets {
 	caddr_t kb_next;	/* list of free blocks */
 	caddr_t kb_last;	/* last free block */
 	long	kb_total;	/* total number of blocks allocated */
 	long	kb_elmpercl;	/* # of elements in this sized allocation */
 	long	kb_totalfree;	/* # of free elements in this bucket */
 	long	kb_calls;	/* total calls to allocate this size */
 	long	kb_highwat;	/* high water mark */
 	long	kb_couldfree;	/* over high water mark and could free */
 };
 
 #ifdef KERNEL
 
 #define	MINALLOCSIZE	(1 << MINBUCKET)
 #define BUCKETINDX(size) \
 	((size) <= (MINALLOCSIZE * 128) \
 		? (size) <= (MINALLOCSIZE * 8) \
 			? (size) <= (MINALLOCSIZE * 2) \
 				? (size) <= (MINALLOCSIZE * 1) \
 					? (MINBUCKET + 0) \
 					: (MINBUCKET + 1) \
 				: (size) <= (MINALLOCSIZE * 4) \
 					? (MINBUCKET + 2) \
 					: (MINBUCKET + 3) \
 			: (size) <= (MINALLOCSIZE* 32) \
 				? (size) <= (MINALLOCSIZE * 16) \
 					? (MINBUCKET + 4) \
 					: (MINBUCKET + 5) \
 				: (size) <= (MINALLOCSIZE * 64) \
 					? (MINBUCKET + 6) \
 					: (MINBUCKET + 7) \
 		: (size) <= (MINALLOCSIZE * 2048) \
 			? (size) <= (MINALLOCSIZE * 512) \
 				? (size) <= (MINALLOCSIZE * 256) \
 					? (MINBUCKET + 8) \
 					: (MINBUCKET + 9) \
 				: (size) <= (MINALLOCSIZE * 1024) \
 					? (MINBUCKET + 10) \
 					: (MINBUCKET + 11) \
 			: (size) <= (MINALLOCSIZE * 8192) \
 				? (size) <= (MINALLOCSIZE * 4096) \
 					? (MINBUCKET + 12) \
 					: (MINBUCKET + 13) \
 				: (size) <= (MINALLOCSIZE * 16384) \
 					? (MINBUCKET + 14) \
 					: (MINBUCKET + 15))
 
 /*
  * Turn virtual addresses into kmem map indices
  */
 #define kmemxtob(alloc)	(kmembase + (alloc) * PAGE_SIZE)
 #define btokmemx(addr)	(((caddr_t)(addr) - kmembase) / PAGE_SIZE)
 #define btokup(addr)	(&kmemusage[(caddr_t)(addr) - kmembase >> PAGE_SHIFT])
 
 /*
  * Macro versions for the usual cases of malloc/free
  */
 #if defined(KMEMSTATS) || defined(DIAGNOSTIC)
 #define	MALLOC(space, cast, size, type, flags) \
 	(space) = (cast)malloc((u_long)(size), type, flags)
 #define FREE(addr, type) free((addr), type)
 
 #else /* do not collect statistics */
 #define	MALLOC(space, cast, size, type, flags) do { \
 	register struct kmembuckets *kbp = &bucket[BUCKETINDX(size)]; \
-	long s = splimp(); \
+	long s = splmem(); \
 	if (kbp->kb_next == NULL) { \
 		(space) = (cast)malloc((u_long)(size), type, flags); \
 	} else { \
 		(space) = (cast)kbp->kb_next; \
 		kbp->kb_next = *(caddr_t *)(space); \
 	} \
 	splx(s); \
 } while (0)
 
 #define	FREE(addr, type) do { \
 	register struct kmembuckets *kbp; \
 	register struct kmemusage *kup = btokup(addr); \
-	long s = splimp(); \
+	long s = splmem(); \
 	if (1 << kup->ku_indx > MAXALLOCSAVE) { \
 		free((addr), type); \
 	} else { \
 		kbp = &bucket[kup->ku_indx]; \
 		if (kbp->kb_next == NULL) \
 			kbp->kb_next = (caddr_t)(addr); \
 		else \
 			*(caddr_t *)(kbp->kb_last) = (caddr_t)(addr); \
 		*(caddr_t *)(addr) = NULL; \
 		kbp->kb_last = (caddr_t)(addr); \
 	} \
 	splx(s); \
 } while (0)
 
 extern struct kmemusage *kmemusage;
 extern char *kmembase;
 extern struct kmembuckets bucket[];
 #endif /* do not collect statistics */
 
 /*
  * XXX this should be declared in <sys/uio.h>, but that tends to fail
  * because <sys/uio.h> is included in a header before the source file
  * has a chance to include <sys/malloc.h> to get MALLOC_DECLARE() defined.
  */
 MALLOC_DECLARE(M_IOV);
 
 void	*contigmalloc __P((unsigned long size, struct malloc_type *type,
 			   int flags,
 			   unsigned long low, unsigned long high,
 			   unsigned long alignment, unsigned long boundary));
 void	free __P((void *addr, struct malloc_type *type));
 void	*malloc __P((unsigned long size, struct malloc_type *type, int flags));
 #endif /* KERNEL */
 
 #endif /* !_SYS_MALLOC_H_ */
Index: head/sys/sys/mount.h
===================================================================
--- head/sys/sys/mount.h	(revision 34265)
+++ head/sys/sys/mount.h	(revision 34266)
@@ -1,488 +1,493 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mount.h	8.21 (Berkeley) 5/20/95
- *	$Id: mount.h,v 1.56 1998/02/22 01:17:51 jkh Exp $
+ *	$Id: mount.h,v 1.57 1998/03/01 22:46:36 msmith Exp $
  */
 
 #ifndef _SYS_MOUNT_H_
 #define _SYS_MOUNT_H_
 
 #ifndef KERNEL
 #include <sys/ucred.h>
 #endif
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <net/radix.h>
 #include <sys/socket.h>		/* XXX for AF_MAX */
 
 typedef struct fsid { int32_t val[2]; } fsid_t;	/* file system id type */
 
 /*
  * File identifier.
  * These are unique per filesystem on a single machine.
  */
 #define	MAXFIDSZ	16
 
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_reserved;		/* force longword alignment */
 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
  * file system statistics
  */
 
 #define MFSNAMELEN	16	/* length of fs type name, including null */
 #define	MNAMELEN	90	/* length of buffer for returned name */
 
 struct statfs {
 	long	f_spare2;		/* placeholder */
 	long	f_bsize;		/* fundamental file system block size */
 	long	f_iosize;		/* optimal transfer block size */
 	long	f_blocks;		/* total data blocks in file system */
 	long	f_bfree;		/* free blocks in fs */
 	long	f_bavail;		/* free blocks avail to non-superuser */
 	long	f_files;		/* total file nodes in file system */
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* file system id */
 	uid_t	f_owner;		/* user that mounted the filesystem */
 	int	f_type;			/* type of filesystem (see below) */
 	int	f_flags;		/* copy of mount exported flags */
-	long	f_spare[2];		/* spare for later */
+	long    f_syncwrites;		/* count of sync writes since mount */
+	long    f_asyncwrites;		/* count of async writes since mount */
 	char	f_fstypename[MFSNAMELEN]; /* fs type name */
 	char	f_mntonname[MNAMELEN];	/* directory on which mounted */
 	char	f_mntfromname[MNAMELEN];/* mounted filesystem */
 };
 
 /*
  * File system types (for backwards compat with 4.4Lite.)
  */
 #define	MOUNT_NONE	0
 #define	MOUNT_UFS	1	/* Fast Filesystem */
 #define	MOUNT_NFS	2	/* Sun-compatible Network Filesystem */
 #define	MOUNT_MFS	3	/* Memory-based Filesystem */
 #define	MOUNT_MSDOS	4	/* MS/DOS Filesystem */
 #define	MOUNT_LFS	5	/* Log-based Filesystem */
 #define	MOUNT_LOFS	6	/* Loopback Filesystem */
 #define	MOUNT_FDESC	7	/* File Descriptor Filesystem */
 #define	MOUNT_PORTAL	8	/* Portal Filesystem */
 #define MOUNT_NULL	9	/* Minimal Filesystem Layer */
 #define MOUNT_UMAP	10	/* User/Group Identifier Remapping Filesystem */
 #define MOUNT_KERNFS	11	/* Kernel Information Filesystem */
 #define MOUNT_PROCFS	12	/* /proc Filesystem */
 #define MOUNT_AFS	13	/* Andrew Filesystem */
 #define MOUNT_CD9660	14	/* ISO9660 (aka CDROM) Filesystem */
 #define MOUNT_UNION	15	/* Union (translucent) Filesystem */
 #define MOUNT_DEVFS	16	/* existing device Filesystem */
 #define	MOUNT_EXT2FS	17	/* Linux EXT2FS */
 #define MOUNT_TFS	18	/* Netcon Novell filesystem */
 #define	MOUNT_CFS	19	/* Coda filesystem */
 #define	MOUNT_MAXTYPE	19
 
 #define INITMOUNTNAMES { \
 	"none",		/*  0 MOUNT_NONE */ \
 	"ufs",		/*  1 MOUNT_UFS */ \
 	"nfs",		/*  2 MOUNT_NFS */ \
 	"mfs",		/*  3 MOUNT_MFS */ \
 	"msdos",	/*  4 MOUNT_MSDOS */ \
 	"lfs",		/*  5 MOUNT_LFS */ \
 	"lofs",		/*  6 MOUNT_LOFS */ \
 	"fdesc",	/*  7 MOUNT_FDESC */ \
 	"portal",	/*  8 MOUNT_PORTAL */ \
 	"null",		/*  9 MOUNT_NULL */ \
 	"umap",		/* 10 MOUNT_UMAP */ \
 	"kernfs",	/* 11 MOUNT_KERNFS */ \
 	"procfs",	/* 12 MOUNT_PROCFS */ \
 	"afs",		/* 13 MOUNT_AFS */ \
 	"cd9660",	/* 14 MOUNT_CD9660 */ \
 	"union",	/* 15 MOUNT_UNION */ \
 	"devfs",	/* 16 MOUNT_DEVFS */ \
 	"ext2fs",	/* 17 MOUNT_EXT2FS */ \
 	"tfs",		/* 18 MOUNT_TFS */ \
 	"cfs",		/* 19 MOUNT_CFS */ \
 	0,		/* 20 MOUNT_SPARE */ \
 }
 
 /*
  * Structure per mounted file system.  Each mounted file system has an
  * array of operations and an instance record.  The file systems are
  * put on a doubly linked list.
  */
 LIST_HEAD(vnodelst, vnode);
 
 struct mount {
 	CIRCLEQ_ENTRY(mount) mnt_list;		/* mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
 	struct vfsconf	*mnt_vfc;		/* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
+	struct vnode	*mnt_syncer;		/* syncer vnode */
 	struct vnodelst	mnt_vnodelist;		/* list of vnodes this mount */
 	struct lock	mnt_lock;		/* mount structure lock */
 	int		mnt_flag;		/* flags shared with user */
 	int		mnt_kern_flag;		/* kernel only flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	qaddr_t		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 };
 
 /*
  * User specifiable flags.
  */
 #define	MNT_RDONLY	0x00000001	/* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x00000002	/* file system written synchronously */
 #define	MNT_NOEXEC	0x00000004	/* can't exec from filesystem */
 #define	MNT_NOSUID	0x00000008	/* don't honor setuid bits on fs */
 #define	MNT_NODEV	0x00000010	/* don't interpret special files */
 #define	MNT_UNION	0x00000020	/* union with underlying filesystem */
 #define	MNT_ASYNC	0x00000040	/* file system written asynchronously */
 #define	MNT_SUIDDIR	0x00100000	/* special handling of SUID on dirs */
+#define	MNT_SOFTDEP	0x00200000	/* soft updates being done */
 #define	MNT_NOATIME	0x10000000	/* disable update of file access time */
 #define	MNT_NOCLUSTERR	0x40000000	/* disable cluster read */
 #define	MNT_NOCLUSTERW	0x80000000	/* disable cluster write */
 
 /*
  * NFS export related mount flags.
  */
 #define	MNT_EXRDONLY	0x00000080	/* exported read only */
 #define	MNT_EXPORTED	0x00000100	/* file system is exported */
 #define	MNT_DEFEXPORTED	0x00000200	/* exported to the world */
 #define	MNT_EXPORTANON	0x00000400	/* use anon uid mapping for everyone */
 #define	MNT_EXKERB	0x00000800	/* exported with Kerberos uid mapping */
 #define	MNT_EXPUBLIC	0x20000000	/* public export (WebNFS) */
 
 /*
  * Flags set by internal operations,
  * but visible to the user.
  * XXX some of these are not quite right.. (I've never seen the root flag set)
  */
 #define	MNT_LOCAL	0x00001000	/* filesystem is stored locally */
 #define	MNT_QUOTA	0x00002000	/* quotas are enabled on filesystem */
 #define	MNT_ROOTFS	0x00004000	/* identifies the root filesystem */
 #define	MNT_USER	0x00008000	/* mounted by a user */
 
 /*
  * Mask of flags that are visible to statfs()
  * XXX I think that this could now become (~(MNT_CMDFLAGS))
  * but the 'mount' program may need changing to handle this.
  * XXX MNT_EXPUBLIC is presently left out. I don't know why.
  */
 #define	MNT_VISFLAGMASK	(MNT_RDONLY	| MNT_SYNCHRONOUS | MNT_NOEXEC	| \
 			MNT_NOSUID	| MNT_NODEV	| MNT_UNION	| \
 			MNT_ASYNC	| MNT_EXRDONLY	| MNT_EXPORTED	| \
 			MNT_DEFEXPORTED	| MNT_EXPORTANON| MNT_EXKERB	| \
 			MNT_LOCAL	| MNT_USER	| MNT_QUOTA	| \
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
-			MNT_NOCLUSTERW	| MNT_SUIDDIR/*	| MNT_EXPUBLIC */)
+			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	 \
+			/*	| MNT_EXPUBLIC */)
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
  * XXX These are not STATES and really should be somewhere else.
  */
 #define	MNT_UPDATE	0x00010000	/* not a real mount, just an update */
 #define	MNT_DELEXPORT	0x00020000	/* delete export host lists */
 #define	MNT_RELOAD	0x00040000	/* reload filesystem data */
 #define	MNT_FORCE	0x00080000	/* force unmount or readonly change */
 #define MNT_CMDFLAGS	(MNT_UPDATE|MNT_DELEXPORT|MNT_RELOAD|MNT_FORCE)
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
  * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
  * past the mount point.  This keeps the subtree stable during mounts
  * and unmounts.
  */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define MNTK_WANTRDWR	0x04000000	/* upgrade to read/write requested */
 
 /*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
  * identifier VFS_VFSCONF returns information about all filesystems.
  * Second level identifier VFS_GENERIC is non-terminal.
  */
 #define	VFS_VFSCONF		0	/* get configured filesystems */
 #define	VFS_GENERIC		0	/* generic filesystem information */
 /*
  * Third level identifiers for VFS_GENERIC are given below; third
  * level identifiers for specific filesystems are given in their
  * mount specific header files.
  */
 #define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
 #define VFS_CONF	2	/* struct: vfsconf for filesystem given
 				   as next argument */
 
 /*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
-#define MNT_WAIT	1
-#define MNT_NOWAIT	2
+#define MNT_WAIT	1	/* synchronously wait for I/O to complete */
+#define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
 #define MNT_LAZY	3	/* push data not written by filesystem syncer */
 
 /*
  * Generic file handle
  */
 struct fhandle {
 	fsid_t	fh_fsid;	/* File system id of mount point */
 	struct	fid fh_fid;	/* File sys specific id */
 };
 typedef struct fhandle	fhandle_t;
 
 /*
  * Export arguments for local filesystem mount calls.
  */
 struct export_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	ucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	int	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	int	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 };
 
 /*
  * Structure holding information for a publicly exported filesystem
  * (WebNFS). Currently the specs allow just for one such filesystem.
  */
 struct nfs_public {
 	int		np_valid;	/* Do we hold valid information */
 	fhandle_t	np_handle;	/* Filehandle for pub fs (internal) */
 	struct mount	*np_mount;	/* Mountpoint of exported fs */
 	char		*np_index;	/* Index file */
 };
 
 /*
  * Filesystem configuration information. One of these exists for each
  * type of filesystem supported by the kernel. These are searched at
  * mount time to identify the requested filesystem.
  */
 struct vfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 struct ovfsconf {
 	void	*vfc_vfsops;
 	char	vfc_name[32];
 	int	vfc_index;
 	int	vfc_refcount;
 	int	vfc_flags;
 };
 
 /*
  * NB: these flags refer to IMPLEMENTATION properties, not properties of
  * any actual mounts; i.e., it does not make sense to change the flags.
  */
 #define	VFCF_STATIC	0x00010000	/* statically compiled into kernel */
 #define	VFCF_NETWORK	0x00020000	/* may get data over the network */
 #define	VFCF_READONLY	0x00040000	/* writes are not implemented */
 #define VFCF_SYNTHETIC	0x00080000	/* data does not represent real files */
 #define	VFCF_LOOPBACK	0x00100000	/* aliases some other mounted FS */
 #define	VFCF_UNICODE	0x00200000	/* stores file names as Unicode*/
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MOUNT);
 #endif
 extern int maxvfsconf;		/* highest defined filesystem type */
 extern struct vfsconf *vfsconf;	/* head of list of filesystem types */
 
 /*
  * Operations supported on mounted file system.
  */
 #ifdef __STDC__
 struct nameidata;
 struct mbuf;
 #endif
 
 struct vfsops {
 	int	(*vfs_mount)	__P((struct mount *mp, char *path, caddr_t data,
 				    struct nameidata *ndp, struct proc *p));
 	int	(*vfs_start)	__P((struct mount *mp, int flags,
 				    struct proc *p));
 	int	(*vfs_unmount)	__P((struct mount *mp, int mntflags,
 				    struct proc *p));
 	int	(*vfs_root)	__P((struct mount *mp, struct vnode **vpp));
 	int	(*vfs_quotactl)	__P((struct mount *mp, int cmds, uid_t uid,
 				    caddr_t arg, struct proc *p));
 	int	(*vfs_statfs)	__P((struct mount *mp, struct statfs *sbp,
 				    struct proc *p));
 	int	(*vfs_sync)	__P((struct mount *mp, int waitfor,
 				    struct ucred *cred, struct proc *p));
 	int	(*vfs_vget)	__P((struct mount *mp, ino_t ino,
 				    struct vnode **vpp));
 	int	(*vfs_vrele)	__P((struct mount *mp, struct vnode *vp));
 	int	(*vfs_fhtovp)	__P((struct mount *mp, struct fid *fhp,
 				    struct sockaddr *nam, struct vnode **vpp,
 				    int *exflagsp, struct ucred **credanonp));
 	int	(*vfs_vptofh)	__P((struct vnode *vp, struct fid *fhp));
 	int	(*vfs_init)	__P((struct vfsconf *));
 };
 
 #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
 	(*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P)
 #define VFS_START(MP, FLAGS, P)	  (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P)
 #define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P)
 #define VFS_ROOT(MP, VPP)	  (*(MP)->mnt_op->vfs_root)(MP, VPP)
 #define VFS_QUOTACTL(MP,C,U,A,P)  (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P)
 #define VFS_STATFS(MP, SBP, P)	  (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P)
 #define VFS_SYNC(MP, WAIT, C, P)  (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P)
 #define VFS_VGET(MP, INO, VPP)	  (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP)
 #define VFS_VRELE(MP, VP)	  (*(MP)->mnt_op->vfs_vrele)(MP, VP)
 #define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \
 	(*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED)
 #define	VFS_VPTOFH(VP, FIDP)	  (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP)
 
 #ifdef VFS_LKM
 #include <sys/conf.h>
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/lkm.h>
 
 #define VFS_SET(vfsops, fsname, index, flags) \
 	static struct vfsconf _fs_vfsconf = { \
 		&vfsops, \
 		#fsname, \
 		index, \
 		0, \
 		flags, \
 	}; \
 	extern struct linker_set MODVNOPS; \
 	MOD_VFS(fsname,&MODVNOPS,&_fs_vfsconf); \
 	extern int \
 	fsname ## _mod __P((struct lkm_table *, int, int)); \
 	int \
 	fsname ## _mod(struct lkm_table *lkmtp, int cmd, int ver) { \
 		MOD_DISPATCH(fsname, \
 		lkmtp, cmd, ver, lkm_nullcmd, lkm_nullcmd, lkm_nullcmd); }
 #else
 
 #define VFS_SET(vfsops, fsname, index, flags) \
 	static struct vfsconf _fs_vfsconf = { \
 		&vfsops, \
 		#fsname, \
 		index, \
 		0, \
 		flags | VFCF_STATIC, \
 	}; \
 	DATA_SET(vfs_set,_fs_vfsconf)
 #endif /* VFS_LKM */
 
 #endif /* KERNEL */
 
 #ifdef KERNEL
 #include <net/radix.h>
 #include <sys/socket.h>		/* XXX for AF_MAX */
 
 /*
  * Network address lookup element
  */
 struct netcred {
 	struct	radix_node netc_rnodes[2];
 	int	netc_exflags;
 	struct	ucred netc_anon;
 };
 
 /*
  * Network export information
  */
 struct netexport {
 	struct	netcred ne_defexported;		      /* Default export */
 	struct	radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
 };
 
 extern	char *mountrootfsname;
 
 /*
  * exported vnode operations
  */
 int	dounmount __P((struct mount *, int, struct proc *));
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	  __P((struct mount *, struct netexport *, struct export_args *));
 int	vfs_lock __P((struct mount *));         /* lock a vfs */
 void	vfs_msync __P((struct mount *, int));
 void	vfs_unlock __P((struct mount *));       /* unlock a vfs */
 int	vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *));
 int	vfs_export			    /* process mount export info */
 	  __P((struct mount *, struct netexport *, struct export_args *));
 int	vfs_vrele __P((struct mount *, struct vnode *));
 struct	netcred *vfs_export_lookup	    /* lookup host in fs export list */
 	  __P((struct mount *, struct netexport *, struct sockaddr *));
+int	vfs_allocate_syncvnode __P((struct mount *));
 void	vfs_getnewfsid __P((struct mount *));
 struct	mount *vfs_getvfs __P((fsid_t *));      /* return vfs given fsid */
 int	vfs_mountedon __P((struct vnode *));    /* is a vfs mounted on vp */
 int	vfs_rootmountalloc __P((char *, char *, struct mount **));
 void	vfs_unbusy __P((struct mount *, struct proc *));
 void	vfs_unmountall __P((void));
 extern	CIRCLEQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct simplelock mountlist_slock;
 extern	struct nfs_public nfs_pub;
 
 #else /* !KERNEL */
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	fstatfs __P((int, struct statfs *));
 int	getfh __P((const char *, fhandle_t *));
 int	getfsstat __P((struct statfs *, long, int));
 int	getmntinfo __P((struct statfs **, int));
 int	mount __P((const char *, const char *, int, void *));
 int	statfs __P((const char *, struct statfs *));
 int	unmount __P((const char *, int));
 
 /* C library stuff */
 void	endvfsent __P((void));
 struct	ovfsconf *getvfsbyname __P((const char *));
 struct	ovfsconf *getvfsbytype __P((int));
 struct	ovfsconf *getvfsent __P((void));
 #define	getvfsbyname	new_getvfsbyname
 int	new_getvfsbyname __P((const char *, struct vfsconf *));
 void	setvfsent __P((int));
 int	vfsisloadable __P((const char *));
 int	vfsload __P((const char *));
 __END_DECLS
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_MOUNT_H_ */
Index: head/sys/sys/vnode.h
===================================================================
--- head/sys/sys/vnode.h	(revision 34265)
+++ head/sys/sys/vnode.h	(revision 34266)
@@ -1,539 +1,545 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
- * $Id: vnode.h,v 1.66 1998/01/24 02:01:31 dyson Exp $
+ * $Id: vnode.h,v 1.67 1998/03/07 21:36:27 dyson Exp $
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/queue.h>
 #include <sys/select.h>		/* needed for struct selinfo in vnodes */
 
 #include <machine/lock.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
 
 /*
  * Vnode tag types.
  * These are for the benefit of external programs only (e.g., pstat)
  * and should NEVER be inspected by the kernel.
  */
 enum vtagtype	{
 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
 	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
-	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS
+	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS
 };
 
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 LIST_HEAD(buflists, buf);
 
 typedef	int 	vop_t __P((void *));
 struct namecache;
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  * v_freelist is locked by the global vnode_free_list simple lock.
  * v_mntvnodes is locked by the global mntvnodes simple lock.
  * v_flag, v_usecount, v_holdcount and v_writecount are
  *    locked by the v_interlock simple lock.
  * v_pollinfo is locked by the lock contained inside it.
  */
 struct vnode {
 	u_long	v_flag;				/* vnode flags (see below) */
 	int	v_usecount;			/* reference count of users */
 	int	v_writecount;			/* reference count of writers */
 	int	v_holdcnt;			/* page & buffer references */
 	daddr_t	v_lastr;			/* last read (read-ahead) */
 	u_long	v_id;				/* capability identifier */
 	struct	mount *v_mount;			/* ptr to vfs we are in */
 	vop_t	**v_op;				/* vnode operations vector */
 	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
+	LIST_ENTRY(vnode) v_synclist;		/* vnodes with dirty buffers */
 	long	v_numoutput;			/* num of writes in progress */
 	enum	vtype v_type;			/* vnode type */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
 		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
 		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 	struct	nqlease *v_lease;		/* Soft reference to lease */
 	daddr_t	v_lastw;			/* last write (write cluster) */
 	daddr_t	v_cstart;			/* start block of cluster */
 	daddr_t	v_lasta;			/* last allocation */
 	int	v_clen;				/* length of current cluster */
 	int	v_maxio;			/* maximum I/O cluster size */
 	struct vm_object *v_object;		/* Place to store VM object */
 	struct	simplelock v_interlock;		/* lock on usecount and flag */
 	struct	lock *v_vnlock;			/* used for non-locking fs's */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 	LIST_HEAD(, namecache) v_cache_src;	/* Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* Cache entries to us */
 	struct	vnode *v_dd;			/* .. vnode */
 	u_long	v_ddid;				/* .. capability identifier */
 	struct	{
 		struct	simplelock vpi_lock;	/* lock to protect below */
 		struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 		short	vpi_events;		/* what they are looking for */
 		short	vpi_revents;		/* what has happened */
 	} v_pollinfo;
 };
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
 #define	v_specinfo	v_un.vu_specinfo
 #define	v_fifoinfo	v_un.vu_fifoinfo
 
 #define	VN_POLLEVENT(vp, events)				\
 	do {							\
 		if ((vp)->v_pollinfo.vpi_events & (events))	\
 			vn_pollevent((vp), (events));		\
 	} while (0)
 
 /*
  * Vnode flags.
  */
 #define	VROOT		0x00001	/* root of its file system */
 #define	VTEXT		0x00002	/* vnode is a pure text prototype */
 #define	VSYSTEM		0x00004	/* vnode being used by kernel */
 #define	VISTTY		0x00008	/* vnode represents a tty */
 #define	VXLOCK		0x00100	/* vnode is locked to change underlying type */
 #define	VXWANT		0x00200	/* process is waiting for vnode */
 #define	VBWAIT		0x00400	/* waiting for output to complete */
 #define	VALIASED	0x00800	/* vnode has an alias */
 #define	VDIROP		0x01000	/* LFS: vnode is involved in a directory op */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
 #define	VNINACT		0x04000	/* LFS: skip ufs_inactive() in lfs_vunref */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
 #define	VDOOMED		0x40000	/* This vnode is being recycled */
 #define	VFREE		0x80000	/* This vnode is on the freelist */
-#define	VTBFREE		0x100000	/* This vnode is no the to be freelist */
+#define	VTBFREE		0x100000 /* This vnode is on the to-be-freelist */
+#define	VONWORKLST	0x200000 /* On syncer work-list */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
 	short		va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	long		va_fsid;	/* file system id (dev for now) */
 	long		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	dev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define VA_EXCLUSIVE	0x02		/* exclusive create request */
 
 /*
  * Flags for ioflag.
  */
 #define	IO_UNIT		0x01		/* do I/O as atomic unit */
 #define	IO_APPEND	0x02		/* append write to end */
 #define	IO_SYNC		0x04		/* do I/O synchronously */
 #define	IO_NODELOCKED	0x08		/* underlying node already locked */
 #define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
 #define	IO_VMIO		0x20		/* data already in VMIO space */
 #define	IO_INVAL	0x40		/* invalidate after I/O */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
  */
 #define	VSUID	04000		/* set user id on execution */
 #define	VSGID	02000		/* set group id on execution */
 #define	VSVTX	01000		/* save swapped text even after use */
 #define	VREAD	00400		/* read, write, execute permissions */
 #define	VWRITE	00200
 #define	VEXEC	00100
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002		/* vflush: force file closure */
 #define	WRITECLOSE	0x0004		/* vflush: only close writable files */
 #define	DOCLOSE		0x0008		/* vclean: close active files */
 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
 #define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
 #define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
 
 #define	VREF(vp)	vref(vp)
 
+
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 #ifdef VFS_LKM
 #define	VNODEOP_SET(f) DATA_SET(MODVNOPS,f)
 #else
 #define	VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f)
 #endif
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	int desiredvnodes;		/* number of vnodes desired */
+extern	time_t syncdelay;		/* time to delay syncing vnodes */
+extern	int rushjob;		/* # of slots filesys_syncer should run ASAP */
 extern	struct vm_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
  */
 #define	LEASE_READ	0x1		/* Check lease for readers */
 #define	LEASE_WRITE	0x2		/* Check lease for modifiers */
 
 
 extern void	(*lease_updatetime) __P((int deltat));
 
 #define VSHOULDFREE(vp)	\
 	(!((vp)->v_flag & (VFREE|VDOOMED)) && \
 	 !(vp)->v_holdcnt && !(vp)->v_usecount && \
 	 (!(vp)->v_object || \
 	  !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
 
 #define VSHOULDBUSY(vp)	\
 	(((vp)->v_flag & (VFREE|VTBFREE)) && \
 	 ((vp)->v_holdcnt || (vp)->v_usecount))
 
 #endif /* KERNEL */
 
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define VDESC_VP0_WILLRELE	0x0001
 #define VDESC_VP1_WILLRELE	0x0002
 #define VDESC_VP2_WILLRELE	0x0004
 #define VDESC_VP3_WILLRELE	0x0008
 #define VDESC_NOMAP_VPP		0x0100
 #define VDESC_VPP_WILLRELE	0x0200
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	int	vdesc_offset;		/* offset in vector--first for speed */
 	char    *vdesc_name;		/* a readable name for debugging */
 	int	vdesc_flags;		/* VDESC_* flags */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_proc_offset;	/* proc location, if any */
 	int	vdesc_componentname_offset; /* if any */
 	/*
 	 * Finally, we've got a list of private data (about each operation)
 	 * for each transport layer.  (Support to manage this list is not
 	 * yet part of BSD.)
 	 */
 	caddr_t	*vdesc_transports;
 };
 
 #ifdef KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 /*
  * Interlock for scanning list of vnodes attached to a mountpoint
  */
 extern struct simplelock mntvnode_slock;
 
 /*
  * This macro is very helpful in defining those offsets in the vdesc struct.
  *
  * This is stolen from X11R4.  I ignored all the fancy stuff for
  * Crays, so if you decide to port this to such a serious machine,
  * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
  */
 #define VOPARG_OFFSET(p_type,field) \
         ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
 #define VOPARG_OFFSETOF(s_type,field) \
 	VOPARG_OFFSET(s_type*,field)
 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
 	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
 
 
 /*
  * This structure is used to configure the new vnodeops vector.
  */
 struct vnodeopv_entry_desc {
 	struct vnodeop_desc *opve_op;   /* which operation this is */
 	vop_t *opve_impl;		/* code implementing this operation */
 };
 struct vnodeopv_desc {
 			/* ptr to the ptr to the vector where op should go */
 	vop_t ***opv_desc_vector_p;
 	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
 };
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Macros to aid in tracing VFS locking problems.  Not totally
  * reliable since if the process sleeps between changing the lock
  * state and checking it with the assert, some other process could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  I find that 'cvs co src'
  * is a pretty good test.
  */
 
 /*
  * [dfr] Kludge until I get around to fixing all the vfs locking.
  */
 #define IS_LOCKING_VFS(vp)	((vp)->v_tag == VT_UFS		\
 				 || (vp)->v_tag == VT_MFS	\
 				 || (vp)->v_tag == VT_NFS	\
 				 || (vp)->v_tag == VT_LFS	\
 				 || (vp)->v_tag == VT_ISOFS	\
 				 || (vp)->v_tag == VT_MSDOSFS	\
 				 || (vp)->v_tag == VT_DEVFS)
 
 #define ASSERT_VOP_LOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) {	\
 	panic("%s: %x is not locked but should be", str, vp);	\
     }
 
 #define ASSERT_VOP_UNLOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) {	\
 	panic("%s: %x is locked but shouldn't be", str, vp);	\
     }
 
 #else
 
 #define ASSERT_VOP_LOCKED(vp, str)
 #define ASSERT_VOP_UNLOCKED(vp, str)
 
 #endif
 
 /*
  * VOCALL calls an op given an ops vector.  We break it out because BSD's
  * vclean changes the ops vector and then wants to call ops with the old
  * vector.
  */
 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
 #define VDESC(OP) (& __CONCAT(OP,_desc))
 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
 
 /*
  * Finally, include the default set of vnode operations.
  */
 #include "vnode_if.h"
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
 struct proc;
 struct stat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vnode;
 struct vop_bwrite_args;
 
 extern int	(*lease_check_hook) __P((struct vop_lease_args *));
 
 int 	bdevvp __P((dev_t dev, struct vnode **vpp));
 /* cache_* may belong in namei.h. */
 void	cache_enter __P((struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp));
 int	cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp));
 void	cache_purge __P((struct vnode *vp));
 void	cache_purgevfs __P((struct mount *mp));
 void	cvtstat __P((struct stat *st, struct ostat *ost));
 int 	getnewvnode __P((enum vtagtype tag,
 	    struct mount *mp, vop_t **vops, struct vnode **vpp));
 int	lease_check __P((struct vop_lease_args *ap));
 void 	vattr_null __P((struct vattr *vap));
 int 	vcount __P((struct vnode *vp));
 void	vdrop __P((struct vnode *));
 int	vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
 void	vfs_opv_init __P((struct vnodeopv_desc **them));
 int	vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
 int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
 void 	vgone __P((struct vnode *vp));
 void	vhold __P((struct vnode *));
 int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
 	    struct proc *p, int slpflag, int slptimeo));
 void	vprint __P((char *label, struct vnode *vp));
 int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
 void	vn_pollevent __P((struct vnode *vp, int events));
 void	vn_pollgone __P((struct vnode *vp));
 int	vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
 int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
+void	vn_syncer_add_to_worklist __P((struct vnode *vp, int delay));
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred, int waslocked));
 int 	vn_writechk __P((struct vnode *vp));
 int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
 int	vop_noislocked __P((struct vop_islocked_args *));
 int	vop_nolock __P((struct vop_lock_args *));
 int	vop_nopoll __P((struct vop_poll_args *));
 int	vop_nounlock __P((struct vop_unlock_args *));
 int	vop_stdpathconf __P((struct vop_pathconf_args *));
 int	vop_stdpoll __P((struct vop_poll_args *));
 int	vop_revoke __P((struct vop_revoke_args *));
 int	vop_sharedlock __P((struct vop_lock_args *));
 int	vop_eopnotsupp __P((struct vop_generic_args *ap));
 int	vop_ebadf __P((struct vop_generic_args *ap));
 int	vop_einval __P((struct vop_generic_args *ap));
 int	vop_enotty __P((struct vop_generic_args *ap));
 int	vop_defaultop __P((struct vop_generic_args *ap));
 int	vop_null __P((struct vop_generic_args *ap));
 
 struct vnode *
 	checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
 void 	vput __P((struct vnode *vp));
 void 	vrele __P((struct vnode *vp));
 void	vref __P((struct vnode *vp));
 void	vbusy __P((struct vnode *vp));
 
 extern	vop_t	**default_vnodeop_p;
 
 extern TAILQ_HEAD(tobefreelist, vnode)
 	vnode_tobefree_list;	/* vnode free list */
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
Index: head/sys/ufs/ffs/ffs_alloc.c
===================================================================
--- head/sys/ufs/ffs/ffs_alloc.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_alloc.c	(revision 34266)
@@ -1,1636 +1,1683 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_alloc.c	8.18 (Berkeley) 5/26/95
- * $Id: ffs_alloc.c,v 1.46 1998/02/04 22:33:27 eivind Exp $
+ * $Id: ffs_alloc.c,v 1.47 1998/02/06 12:14:13 eivind Exp $
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 typedef ufs_daddr_t allocfcn_t __P((struct inode *ip, int cg, ufs_daddr_t bpref,
 				  int size));
 
 static ufs_daddr_t ffs_alloccg __P((struct inode *, int, ufs_daddr_t, int));
-static ufs_daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, ufs_daddr_t));
+static ufs_daddr_t
+	      ffs_alloccgblk __P((struct inode *, struct buf *, ufs_daddr_t));
 #ifdef DIAGNOSTIC
 static int	ffs_checkblk __P((struct inode *, ufs_daddr_t, long));
 #endif
 static void	ffs_clusteracct	__P((struct fs *, struct cg *, ufs_daddr_t,
 				     int));
 #ifdef notyet
 static ufs_daddr_t ffs_clusteralloc __P((struct inode *, int, ufs_daddr_t,
 	    int));
 #endif
 static ino_t	ffs_dirpref __P((struct fs *));
 static ufs_daddr_t ffs_fragextend __P((struct inode *, int, long, int, int));
 static void	ffs_fserr __P((struct fs *, u_int, char *));
 static u_long	ffs_hashalloc
 		    __P((struct inode *, int, long, int, allocfcn_t *));
 static ino_t	ffs_nodealloccg __P((struct inode *, int, ufs_daddr_t, int));
 static ufs_daddr_t ffs_mapsearch __P((struct fs *, struct cg *, ufs_daddr_t,
 	    int));
 
 /*
  * Allocate a block in the file system.
  *
  * The size of the requested block is given, which must be some
  * multiple of fs_fsize and <= fs_bsize.
  * A preference may be optionally specified. If a preference is given
  * the following hierarchy is used to allocate a block:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate a block in the same cylinder group.
  *   4) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  * If no block preference is given the following heirarchy is used
  * to allocate a block:
  *   1) allocate a block in the cylinder group that contains the
  *      inode for the file.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available block is located.
  */
 int
 ffs_alloc(ip, lbn, bpref, size, cred, bnp)
 	register struct inode *ip;
 	ufs_daddr_t lbn, bpref;
 	int size;
 	struct ucred *cred;
 	ufs_daddr_t *bnp;
 {
 	register struct fs *fs;
 	ufs_daddr_t bno;
 	int cg;
 #ifdef QUOTA
 	int error;
 #endif
 
 	*bnp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("dev = 0x%lx, bsize = %ld, size = %d, fs = %s\n",
 		    (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("ffs_alloc: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_alloc: missing credential");
 #endif /* DIAGNOSTIC */
 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
 		goto nospace;
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0)
 		goto nospace;
 #ifdef QUOTA
 	error = chkdq(ip, (long)btodb(size), cred, 0);
 	if (error)
 		return (error);
 #endif
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	if (bpref == 0)
 		cg = ino_to_cg(fs, ip->i_number);
 	else
 		cg = dtog(fs, bpref);
 	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size,
 					 ffs_alloccg);
 	if (bno > 0) {
 		ip->i_blocks += btodb(size);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bnp = bno;
 		return (0);
 	}
 #ifdef QUOTA
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
 #endif
 nospace:
 	ffs_fserr(fs, cred->cr_uid, "file system full");
 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
 /*
  * Reallocate a fragment to a bigger size
  *
  * The number and size of the old block is given, and a preference
  * and new size is also specified. The allocator attempts to extend
  * the original block. Failing that, the regular block allocator is
  * invoked to get an appropriate block.
  */
 int
 ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp)
 	register struct inode *ip;
 	ufs_daddr_t lbprev;
 	ufs_daddr_t bpref;
 	int osize, nsize;
 	struct ucred *cred;
 	struct buf **bpp;
 {
 	register struct fs *fs;
 	struct buf *bp;
 	int cg, request, error;
 	ufs_daddr_t bprev, bno;
 
 	*bpp = 0;
 	fs = ip->i_fs;
 #ifdef DIAGNOSTIC
 	if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
 	    (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
 		printf(
 		    "dev = 0x%lx, bsize = %ld, osize = %d, "
 		    "nsize = %d, fs = %s\n",
 		    (u_long)ip->i_dev, fs->fs_bsize, osize,
 		    nsize, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad size");
 	}
 	if (cred == NOCRED)
 		panic("ffs_realloccg: missing credential");
 #endif /* DIAGNOSTIC */
 	if (cred->cr_uid != 0 &&
 	    freespace(fs, fs->fs_minfree) -  numfrags(fs, nsize - osize) < 0)
 		goto nospace;
 	if ((bprev = ip->i_db[lbprev]) == 0) {
 		printf("dev = 0x%lx, bsize = %ld, bprev = %ld, fs = %s\n",
 		    (u_long) ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad bprev");
 	}
 	/*
 	 * Allocate the extra space in the buffer.
 	 */
 	error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 
 	if( bp->b_blkno == bp->b_lblkno) {
 		if( lbprev >= NDADDR)
 			panic("ffs_realloccg: lbprev out of range");
 		bp->b_blkno = fsbtodb(fs, bprev);
 	}
 
 #ifdef QUOTA
 	error = chkdq(ip, (long)btodb(nsize - osize), cred, 0);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 #endif
 	/*
 	 * Check for extension in the existing location.
 	 */
 	cg = dtog(fs, bprev);
 	bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize);
 	if (bno) {
 		if (bp->b_blkno != fsbtodb(fs, bno))
 			panic("ffs_realloccg: bad blockno");
 		ip->i_blocks += btodb(nsize - osize);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 	/*
 	 * Allocate a new disk location.
 	 */
 	if (bpref >= fs->fs_size)
 		bpref = 0;
 	switch ((int)fs->fs_optim) {
 	case FS_OPTSPACE:
 		/*
 		 * Allocate an exact sized fragment. Although this makes
 		 * best use of space, we will waste time relocating it if
 		 * the file continues to grow. If the fragmentation is
 		 * less than half of the minimum free reserve, we choose
 		 * to begin optimizing for time.
 		 */
 		request = nsize;
 		if (fs->fs_minfree <= 5 ||
 		    fs->fs_cstotal.cs_nffree >
 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTTIME;
 		break;
 	case FS_OPTTIME:
 		/*
 		 * At this point we have discovered a file that is trying to
 		 * grow a small fragment to a larger fragment. To save time,
 		 * we allocate a full sized block, then free the unused portion.
 		 * If the file continues to grow, the `ffs_fragextend' call
 		 * above will be able to grow it in place without further
 		 * copying. If aberrant programs cause disk fragmentation to
 		 * grow within 2% of the free reserve, we choose to begin
 		 * optimizing for space.
 		 */
 		request = fs->fs_bsize;
 		if (fs->fs_cstotal.cs_nffree <
 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
 			break;
 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
 			fs->fs_fsmnt);
 		fs->fs_optim = FS_OPTSPACE;
 		break;
 	default:
 		printf("dev = 0x%lx, optim = %ld, fs = %s\n",
 		    (u_long)ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
 		panic("ffs_realloccg: bad optim");
 		/* NOTREACHED */
 	}
 	bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request,
 					 ffs_alloccg);
 	if (bno > 0) {
 		bp->b_blkno = fsbtodb(fs, bno);
-		ffs_blkfree(ip, bprev, (long)osize);
+		if (!DOINGSOFTDEP(ITOV(ip)))
+			ffs_blkfree(ip, bprev, (long)osize);
 		if (nsize < request)
 			ffs_blkfree(ip, bno + numfrags(fs, nsize),
 			    (long)(request - nsize));
 		ip->i_blocks += btodb(nsize - osize);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, nsize);
 		bp->b_flags |= B_DONE;
 		bzero((char *)bp->b_data + osize, (u_int)nsize - osize);
 		*bpp = bp;
 		return (0);
 	}
 #ifdef QUOTA
 	/*
 	 * Restore user's disk quota because allocation failed.
 	 */
 	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
 #endif
 	brelse(bp);
 nospace:
 	/*
 	 * no space available
 	 */
 	ffs_fserr(fs, cred->cr_uid, "file system full");
 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
 #ifdef notyet
 /*
  * Reallocate a sequence of blocks into a contiguous sequence of blocks.
  *
  * The vnode and an array of buffer pointers for a range of sequential
  * logical blocks to be made contiguous is given. The allocator attempts
  * to find a range of sequential blocks starting as close as possible to
  * an fs_rotdelay offset from the end of the allocation for the logical
  * block immediately preceeding the current range. If successful, the
  * physical block numbers in the buffer pointers and in the inode are
  * changed to reflect the new allocation. If unsuccessful, the allocation
  * is left unchanged. The success in doing the reallocation is returned.
  * Note that the error return is not reflected back to the user. Rather
  * the previous block allocation will be used.
  */
 static int doasyncfree = 1;
 SYSCTL_INT(_vfs_ffs, FFS_ASYNCFREE, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, "");
 
 static int doreallocblks = 1;
 SYSCTL_INT(_vfs_ffs, FFS_REALLOCBLKS, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, "");
 
 static int prtrealloc = 0;
 #endif
 
 int
 ffs_reallocblks(ap)
 	struct vop_reallocblks_args /* {
 		struct vnode *a_vp;
 		struct cluster_save *a_buflist;
 	} */ *ap;
 {
 #if !defined (not_yes)
 	return (ENOSPC);
 #else
 	struct fs *fs;
 	struct inode *ip;
 	struct vnode *vp;
 	struct buf *sbp, *ebp;
 	ufs_daddr_t *bap, *sbap, *ebap = 0;
 	struct cluster_save *buflist;
 	ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno;
 	struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp;
 	int i, len, start_lvl, end_lvl, pref, ssize;
 	struct timeval tv;
 
 	if (doreallocblks == 0)
 		return (ENOSPC);
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	if (fs->fs_contigsumsize <= 0)
 		return (ENOSPC);
 	buflist = ap->a_buflist;
 	len = buflist->bs_nchildren;
 	start_lbn = buflist->bs_children[0]->b_lblkno;
 	end_lbn = start_lbn + len - 1;
 #ifdef DIAGNOSTIC
 	for (i = 0; i < len; i++)
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 1");
 	for (i = 1; i < len; i++)
 		if (buflist->bs_children[i]->b_lblkno != start_lbn + i)
 			panic("ffs_reallocblks: non-logical cluster");
 	blkno = buflist->bs_children[0]->b_blkno;
 	ssize = fsbtodb(fs, fs->fs_frag);
 	for (i = 1; i < len - 1; i++)
 		if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize))
 			panic("ffs_reallocblks: non-physical cluster %d", i);
 #endif
 	/*
 	 * If the latest allocation is in a new cylinder group, assume that
 	 * the filesystem has decided to move and do not force it back to
 	 * the previous cylinder group.
 	 */
 	if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) !=
 	    dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno)))
 		return (ENOSPC);
 	if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) ||
 	    ufs_getlbns(vp, end_lbn, end_ap, &end_lvl))
 		return (ENOSPC);
 	/*
 	 * Get the starting offset and block map for the first block.
 	 */
 	if (start_lvl == 0) {
 		sbap = &ip->i_db[0];
 		soff = start_lbn;
 	} else {
 		idp = &start_ap[start_lvl - 1];
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) {
 			brelse(sbp);
 			return (ENOSPC);
 		}
 		sbap = (ufs_daddr_t *)sbp->b_data;
 		soff = idp->in_off;
 	}
 	/*
 	 * Find the preferred location for the cluster.
 	 */
 	pref = ffs_blkpref(ip, start_lbn, soff, sbap);
 	/*
 	 * If the block range spans two block maps, get the second map.
 	 */
 	if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) {
 		ssize = len;
 	} else {
 #ifdef DIAGNOSTIC
 		if (start_ap[start_lvl-1].in_lbn == idp->in_lbn)
 			panic("ffs_reallocblk: start == end");
 #endif
 		ssize = len - (idp->in_off + 1);
 		if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp))
 			goto fail;
 		ebap = (ufs_daddr_t *)ebp->b_data;
 	}
 	/*
 	 * Search the block map looking for an allocation of the desired size.
 	 */
 	if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref,
 	    len, ffs_clusteralloc)) == 0)
 		goto fail;
 	/*
 	 * We have found a new contiguous block.
 	 *
 	 * First we have to replace the old block pointers with the new
 	 * block pointers in the inode and indirect blocks associated
 	 * with the file.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number,
 		    start_lbn, end_lbn);
 #endif
 	blkno = newblk;
 	for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) {
-		if (i == ssize)
+		if (i == ssize) {
 			bap = ebap;
+			soff = -i;
+		}
 #ifdef DIAGNOSTIC
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 2");
 		if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap)
 			panic("ffs_reallocblks: alloc mismatch");
 #endif
 #ifdef DEBUG
 		if (prtrealloc)
 			printf(" %d,", *bap);
 #endif
+		if (DOINGSOFTDEP(vp)) {
+			if (sbap == &ip->i_db[0] && i < ssize)
+				softdep_setup_allocdirect(ip, start_lbn + i,
+				    blkno, *bap, fs->fs_bsize, fs->fs_bsize,
+				    buflist->bs_children[i]);
+			else
+				softdep_setup_allocindir_page(ip, start_lbn + i,
+				    i < ssize ? sbp : ebp, soff + i, blkno,
+				    *bap, buflist->bs_children[i]);
+		}
 		*bap++ = blkno;
 	}
 	/*
 	 * Next we must write out the modified inode and indirect blocks.
 	 * For strict correctness, the writes should be synchronous since
 	 * the old block values may have been written to disk. In practise
 	 * they are almost never written, but if we are concerned about
 	 * strict correctness, the `doasyncfree' flag should be set to zero.
 	 *
 	 * The test on `doasyncfree' should be changed to test a flag
 	 * that shows whether the associated buffers and inodes have
 	 * been written. The flag should be set when the cluster is
 	 * started and cleared whenever the buffer or inode is flushed.
 	 * We can then check below to see if it is set, and do the
 	 * synchronous write only when it has been cleared.
 	 */
 	if (sbap != &ip->i_db[0]) {
 		if (doasyncfree)
 			bdwrite(sbp);
 		else
 			bwrite(sbp);
 	} else {
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		if (!doasyncfree) {
 			gettime(&tv);
 			UFS_UPDATE(vp, &tv, &tv, 1);
 		}
 	}
 	if (ssize < len)
 		if (doasyncfree)
 			bdwrite(ebp);
 		else
 			bwrite(ebp);
 	/*
 	 * Last, free the old blocks and assign the new blocks to the buffers.
 	 */
 #ifdef DEBUG
 	if (prtrealloc)
 		printf("\n\tnew:");
 #endif
 	for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) {
-		ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno),
-		    fs->fs_bsize);
+		if (!DOINGSOFTDEP(vp))
+			ffs_blkfree(ip,
+			    dbtofsb(fs, buflist->bs_children[i]->b_blkno),
+			    fs->fs_bsize);
 		buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno);
 #ifdef DEBUG
 		if (!ffs_checkblk(ip,
 		   dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize))
 			panic("ffs_reallocblks: unallocated block 3");
 		if (prtrealloc)
 			printf(" %d,", blkno);
 #endif
 	}
 #ifdef DEBUG
 	if (prtrealloc) {
 		prtrealloc--;
 		printf("\n");
 	}
 #endif
 	return (0);
 
 fail:
 	if (ssize < len)
 		brelse(ebp);
 	if (sbap != &ip->i_db[0])
 		brelse(sbp);
 	return (ENOSPC);
 #endif
 }
 
 /*
  * Allocate an inode in the file system.
  *
  * If allocating a directory, use ffs_dirpref to select the inode.
  * If allocating in a directory, the following hierarchy is followed:
  *   1) allocate the preferred inode.
  *   2) allocate an inode in the same cylinder group.
  *   3) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  * If no inode preference is given the following heirarchy is used
  * to allocate an inode:
  *   1) allocate an inode in cylinder group 0.
  *   2) quadradically rehash into other cylinder groups, until an
  *      available inode is located.
  */
 int
 ffs_valloc(pvp, mode, cred, vpp)
 	struct vnode *pvp;
 	int mode;
 	struct ucred *cred;
 	struct vnode **vpp;
 {
 	register struct inode *pip;
 	register struct fs *fs;
 	register struct inode *ip;
 	ino_t ino, ipref;
 	int cg, error;
 
 	*vpp = NULL;
 	pip = VTOI(pvp);
 	fs = pip->i_fs;
 	if (fs->fs_cstotal.cs_nifree == 0)
 		goto noinodes;
 
 	if ((mode & IFMT) == IFDIR)
 		ipref = ffs_dirpref(fs);
 	else
 		ipref = pip->i_number;
 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
 		ipref = 0;
 	cg = ino_to_cg(fs, ipref);
 	ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode,
 					(allocfcn_t *)ffs_nodealloccg);
 	if (ino == 0)
 		goto noinodes;
 	error = VFS_VGET(pvp->v_mount, ino, vpp);
 	if (error) {
 		UFS_VFREE(pvp, ino, mode);
 		return (error);
 	}
 	ip = VTOI(*vpp);
 	if (ip->i_mode) {
 		printf("mode = 0%o, inum = %ld, fs = %s\n",
 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
 		panic("ffs_valloc: dup alloc");
 	}
 	if (ip->i_blocks) {				/* XXX */
 		printf("free inode %s/%ld had %ld blocks\n",
 		    fs->fs_fsmnt, ino, ip->i_blocks);
 		ip->i_blocks = 0;
 	}
 	ip->i_flags = 0;
 	/*
 	 * Set up a new generation number for this inode.
 	 */
 	if (ip->i_gen == 0 || ++ip->i_gen == 0)
 		ip->i_gen = random() / 2 + 1;
 	return (0);
 noinodes:
 	ffs_fserr(fs, cred->cr_uid, "out of inodes");
 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
 	return (ENOSPC);
 }
 
 /*
  * Find a cylinder to place a directory.
  *
  * The policy implemented by this algorithm is to select from
  * among those cylinder groups with above the average number of
  * free inodes, the one with the smallest number of directories.
  */
 static ino_t
 ffs_dirpref(fs)
 	register struct fs *fs;
 {
 	int cg, minndir, mincg, avgifree;
 
 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
 	minndir = fs->fs_ipg;
 	mincg = 0;
 	for (cg = 0; cg < fs->fs_ncg; cg++)
 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
 			mincg = cg;
 			minndir = fs->fs_cs(fs, cg).cs_ndir;
 		}
 	return ((ino_t)(fs->fs_ipg * mincg));
 }
 
 /*
  * Select the desired position for the next block in a file.  The file is
  * logically divided into sections. The first section is composed of the
  * direct blocks. Each additional section contains fs_maxbpg blocks.
  *
  * If no blocks have been allocated in the first section, the policy is to
  * request a block in the same cylinder group as the inode that describes
  * the file. If no blocks have been allocated in any other section, the
  * policy is to place the section in a cylinder group with a greater than
  * average number of free blocks.  An appropriate cylinder group is found
  * by using a rotor that sweeps the cylinder groups. When a new group of
  * blocks is needed, the sweep begins in the cylinder group following the
  * cylinder group from which the previous allocation was made. The sweep
  * continues until a cylinder group with greater than the average number
  * of free blocks is found. If the allocation is for the first block in an
  * indirect block, the information on the previous allocation is unavailable;
  * here a best guess is made based upon the logical block number being
  * allocated.
  *
  * If a section is already partially allocated, the policy is to
  * contiguously allocate fs_maxcontig blocks.  The end of one of these
  * contiguous blocks and the beginning of the next is physically separated
  * so that the disk head will be in transit between them for at least
  * fs_rotdelay milliseconds.  This is to allow time for the processor to
  * schedule another I/O transfer.
  */
 ufs_daddr_t
 ffs_blkpref(ip, lbn, indx, bap)
 	struct inode *ip;
 	ufs_daddr_t lbn;
 	int indx;
 	ufs_daddr_t *bap;
 {
 	register struct fs *fs;
 	register int cg;
 	int avgbfree, startcg;
 	ufs_daddr_t nextblk;
 
 	fs = ip->i_fs;
 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
 		if (lbn < NDADDR) {
 			cg = ino_to_cg(fs, ip->i_number);
 			return (fs->fs_fpg * cg + fs->fs_frag);
 		}
 		/*
 		 * Find a cylinder with greater than average number of
 		 * unused data blocks.
 		 */
 		if (indx == 0 || bap[indx - 1] == 0)
 			startcg =
 			    ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
 		else
 			startcg = dtog(fs, bap[indx - 1]) + 1;
 		startcg %= fs->fs_ncg;
 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
 		for (cg = startcg; cg < fs->fs_ncg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		for (cg = 0; cg <= startcg; cg++)
 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
 				fs->fs_cgrotor = cg;
 				return (fs->fs_fpg * cg + fs->fs_frag);
 			}
 		return (0);
 	}
 	/*
 	 * One or more previous blocks have been laid out. If less
 	 * than fs_maxcontig previous blocks are contiguous, the
 	 * next block is requested contiguously, otherwise it is
 	 * requested rotationally delayed by fs_rotdelay milliseconds.
 	 */
 	nextblk = bap[indx - 1] + fs->fs_frag;
 	if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig ||
 	    bap[indx - fs->fs_maxcontig] +
 	    blkstofrags(fs, fs->fs_maxcontig) != nextblk)
 		return (nextblk);
 	/*
 	 * Here we convert ms of delay to frags as:
 	 * (frags) = (ms) * (rev/sec) * (sect/rev) /
 	 *	((sect/frag) * (ms/sec))
 	 * then round up to the next block.
 	 */
 	nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
 	    (NSPF(fs) * 1000), fs->fs_frag);
 	return (nextblk);
 }
 
 /*
  * Implement the cylinder overflow algorithm.
  *
  * The policy implemented by this algorithm is:
  *   1) allocate the block in its requested cylinder group.
  *   2) quadradically rehash on the cylinder group number.
  *   3) brute force search for a free block.
  */
 /*VARARGS5*/
 static u_long
 ffs_hashalloc(ip, cg, pref, size, allocator)
 	struct inode *ip;
 	int cg;
 	long pref;
 	int size;	/* size for data blocks, mode for inodes */
 	allocfcn_t *allocator;
 {
 	register struct fs *fs;
 	long result;	/* XXX why not same type as we return? */
 	int i, icg = cg;
 
 	fs = ip->i_fs;
 	/*
 	 * 1: preferred cylinder group
 	 */
 	result = (*allocator)(ip, cg, pref, size);
 	if (result)
 		return (result);
 	/*
 	 * 2: quadratic rehash
 	 */
 	for (i = 1; i < fs->fs_ncg; i *= 2) {
 		cg += i;
 		if (cg >= fs->fs_ncg)
 			cg -= fs->fs_ncg;
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 	}
 	/*
 	 * 3: brute force search
 	 * Note that we start at i == 2, since 0 was checked initially,
 	 * and 1 is always checked in the quadratic rehash.
 	 */
 	cg = (icg + 2) % fs->fs_ncg;
 	for (i = 2; i < fs->fs_ncg; i++) {
 		result = (*allocator)(ip, cg, 0, size);
 		if (result)
 			return (result);
 		cg++;
 		if (cg == fs->fs_ncg)
 			cg = 0;
 	}
 	return (0);
 }
 
 /*
  * Determine whether a fragment can be extended.
  *
  * Check to see if the necessary fragments are available, and
  * if they are, allocate them.
  */
 static ufs_daddr_t
 ffs_fragextend(ip, cg, bprev, osize, nsize)
 	struct inode *ip;
 	int cg;
 	long bprev;
 	int osize, nsize;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	long bno;
 	int frags, bbase;
 	int i, error;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
 		return (0);
 	frags = numfrags(fs, nsize);
 	bbase = fragnum(fs, bprev);
 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
 		/* cannot extend across a block boundary */
 		return (0);
 	}
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return (0);
 	}
 	cgp->cg_time = time.tv_sec;
 	bno = dtogd(fs, bprev);
 	for (i = numfrags(fs, osize); i < frags; i++)
 		if (isclr(cg_blksfree(cgp), bno + i)) {
 			brelse(bp);
 			return (0);
 		}
 	/*
 	 * the current fragment can be extended
 	 * deduct the count on fragment being extended into
 	 * increase the count on the remaining fragment (if any)
 	 * allocate the extended piece
 	 */
 	for (i = frags; i < fs->fs_frag - bbase; i++)
 		if (isclr(cg_blksfree(cgp), bno + i))
 			break;
 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
 	if (i != frags)
 		cgp->cg_frsum[i - frags]++;
 	for (i = numfrags(fs, osize); i < frags; i++) {
 		clrbit(cg_blksfree(cgp), bno + i);
 		cgp->cg_cs.cs_nffree--;
 		fs->fs_cstotal.cs_nffree--;
 		fs->fs_cs(fs, cg).cs_nffree--;
 	}
 	fs->fs_fmod = 1;
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, fs, bprev);
 	bdwrite(bp);
 	return (bprev);
 }
 
 /*
  * Determine whether a block can be allocated.
  *
  * Check to see if a block of the appropriate size is available,
  * and if it is, allocate it.
  */
 static ufs_daddr_t
 ffs_alloccg(ip, cg, bpref, size)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t bpref;
 	int size;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	register int i;
-	int error, bno, frags, allocsiz;
+	ufs_daddr_t bno, blkno;
+	int allocsiz, error, frags;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
 		return (0);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp) ||
 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
 		brelse(bp);
 		return (0);
 	}
 	cgp->cg_time = time.tv_sec;
 	if (size == fs->fs_bsize) {
-		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bno = ffs_alloccgblk(ip, bp, bpref);
 		bdwrite(bp);
 		return (bno);
 	}
 	/*
 	 * check to see if any fragments are already available
 	 * allocsiz is the size which will be allocated, hacking
 	 * it down to a smaller size if necessary
 	 */
 	frags = numfrags(fs, size);
 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
 		if (cgp->cg_frsum[allocsiz] != 0)
 			break;
 	if (allocsiz == fs->fs_frag) {
 		/*
 		 * no fragments were available, so a block will be
 		 * allocated, and hacked up
 		 */
 		if (cgp->cg_cs.cs_nbfree == 0) {
 			brelse(bp);
 			return (0);
 		}
-		bno = ffs_alloccgblk(fs, cgp, bpref);
+		bno = ffs_alloccgblk(ip, bp, bpref);
 		bpref = dtogd(fs, bno);
 		for (i = frags; i < fs->fs_frag; i++)
 			setbit(cg_blksfree(cgp), bpref + i);
 		i = fs->fs_frag - frags;
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		fs->fs_fmod = 1;
 		cgp->cg_frsum[i]++;
 		bdwrite(bp);
 		return (bno);
 	}
 	bno = ffs_mapsearch(fs, cgp, bpref, allocsiz);
 	if (bno < 0) {
 		brelse(bp);
 		return (0);
 	}
 	for (i = 0; i < frags; i++)
 		clrbit(cg_blksfree(cgp), bno + i);
 	cgp->cg_cs.cs_nffree -= frags;
 	fs->fs_cstotal.cs_nffree -= frags;
 	fs->fs_cs(fs, cg).cs_nffree -= frags;
 	fs->fs_fmod = 1;
 	cgp->cg_frsum[allocsiz]--;
 	if (frags != allocsiz)
 		cgp->cg_frsum[allocsiz - frags]++;
+	blkno = cg * fs->fs_fpg + bno;
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, fs, blkno);
 	bdwrite(bp);
-	return (cg * fs->fs_fpg + bno);
+	return ((u_long)blkno);
 }
 
 /*
  * Allocate a block in a cylinder group.
  *
  * This algorithm implements the following policy:
  *   1) allocate the requested block.
  *   2) allocate a rotationally optimal block in the same cylinder.
  *   3) allocate the next available block on the block rotor for the
  *      specified cylinder group.
  * Note that this routine only allocates fs_bsize blocks; these
  * blocks may be fragmented by the routine that allocates them.
  */
 static ufs_daddr_t
-ffs_alloccgblk(fs, cgp, bpref)
-	register struct fs *fs;
-	register struct cg *cgp;
+ffs_alloccgblk(ip, bp, bpref)
+	struct inode *ip;
+	struct buf *bp;
 	ufs_daddr_t bpref;
 {
+	struct fs *fs;
+	struct cg *cgp;
 	ufs_daddr_t bno, blkno;
 	int cylno, pos, delta;
 	short *cylbp;
 	register int i;
 
+	fs = ip->i_fs;
+	cgp = (struct cg *)bp->b_data;
 	if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
 		bpref = cgp->cg_rotor;
 		goto norot;
 	}
 	bpref = blknum(fs, bpref);
 	bpref = dtogd(fs, bpref);
 	/*
 	 * if the requested block is available, use it
 	 */
 	if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
 		bno = bpref;
 		goto gotit;
 	}
 	if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) {
 		/*
 		 * Block layout information is not available.
 		 * Leaving bpref unchanged means we take the
 		 * next available free block following the one
 		 * we just allocated. Hopefully this will at
 		 * least hit a track cache on drives of unknown
 		 * geometry (e.g. SCSI).
 		 */
 		goto norot;
 	}
 	/*
 	 * check for a block available on the same cylinder
 	 */
 	cylno = cbtocylno(fs, bpref);
 	if (cg_blktot(cgp)[cylno] == 0)
 		goto norot;
 	/*
 	 * check the summary information to see if a block is
 	 * available in the requested cylinder starting at the
 	 * requested rotational position and proceeding around.
 	 */
 	cylbp = cg_blks(fs, cgp, cylno);
 	pos = cbtorpos(fs, bpref);
 	for (i = pos; i < fs->fs_nrpos; i++)
 		if (cylbp[i] > 0)
 			break;
 	if (i == fs->fs_nrpos)
 		for (i = 0; i < pos; i++)
 			if (cylbp[i] > 0)
 				break;
 	if (cylbp[i] > 0) {
 		/*
 		 * found a rotational position, now find the actual
 		 * block. A panic if none is actually there.
 		 */
 		pos = cylno % fs->fs_cpc;
 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
 		if (fs_postbl(fs, pos)[i] == -1) {
 			printf("pos = %d, i = %d, fs = %s\n",
 			    pos, i, fs->fs_fsmnt);
 			panic("ffs_alloccgblk: cyl groups corrupted");
 		}
 		for (i = fs_postbl(fs, pos)[i];; ) {
 			if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) {
 				bno = blkstofrags(fs, (bno + i));
 				goto gotit;
 			}
 			delta = fs_rotbl(fs)[i];
 			if (delta <= 0 ||
 			    delta + i > fragstoblks(fs, fs->fs_fpg))
 				break;
 			i += delta;
 		}
 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
 		panic("ffs_alloccgblk: can't find blk in cyl");
 	}
 norot:
 	/*
 	 * no blocks in the requested cylinder, so take next
 	 * available one in this cylinder group.
 	 */
 	bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
 	if (bno < 0)
 		return (0);
 	cgp->cg_rotor = bno;
 gotit:
 	blkno = fragstoblks(fs, bno);
 	ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno);
 	ffs_clusteracct(fs, cgp, blkno, -1);
 	cgp->cg_cs.cs_nbfree--;
 	fs->fs_cstotal.cs_nbfree--;
 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
 	cylno = cbtocylno(fs, bno);
 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
 	cg_blktot(cgp)[cylno]--;
 	fs->fs_fmod = 1;
-	return (cgp->cg_cgx * fs->fs_fpg + bno);
+	blkno = cgp->cg_cgx * fs->fs_fpg + bno;
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_blkmapdep(bp, fs, blkno);
+	return (blkno);
 }
 
 #ifdef notyet
 /*
  * Determine whether a cluster can be allocated.
  *
  * We do not currently check for optimal rotational layout if there
  * are multiple choices in the same cylinder group. Instead we just
  * take the first one that we find following bpref.
  */
 static ufs_daddr_t
 ffs_clusteralloc(ip, cg, bpref, len)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t bpref;
 	int len;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	int i, got, run, bno, bit, map;
 	u_char *mapp;
 	int32_t *lp;
 
 	fs = ip->i_fs;
 	if (fs->fs_maxcluster[cg] < len)
 		return (NULL);
 	if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize,
 	    NOCRED, &bp))
 		goto fail;
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp))
 		goto fail;
 	/*
 	 * Check to see if a cluster of the needed size (or bigger) is
 	 * available in this cylinder group.
 	 */
 	lp = &cg_clustersum(cgp)[len];
 	for (i = len; i <= fs->fs_contigsumsize; i++)
 		if (*lp++ > 0)
 			break;
 	if (i > fs->fs_contigsumsize) {
 		/*
 		 * This is the first time looking for a cluster in this
 		 * cylinder group. Update the cluster summary information
 		 * to reflect the true maximum sized cluster so that
 		 * future cluster allocation requests can avoid reading
 		 * the cylinder group map only to find no clusters.
 		 */
 		lp = &cg_clustersum(cgp)[len - 1];
 		for (i = len - 1; i > 0; i--)
 			if (*lp-- > 0)
 				break;
 		fs->fs_maxcluster[cg] = i;
 		goto fail;
 	}
 	/*
 	 * Search the cluster map to find a big enough cluster.
 	 * We take the first one that we find, even if it is larger
 	 * than we need as we prefer to get one close to the previous
 	 * block allocation. We do not search before the current
 	 * preference point as we do not want to allocate a block
 	 * that is allocated before the previous one (as we will
 	 * then have to wait for another pass of the elevator
 	 * algorithm before it will be read). We prefer to fail and
 	 * be recalled to try an allocation in the next cylinder group.
 	 */
 	if (dtog(fs, bpref) != cg)
 		bpref = 0;
 	else
 		bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
 	mapp = &cg_clustersfree(cgp)[bpref / NBBY];
 	map = *mapp++;
 	bit = 1 << (bpref % NBBY);
 	for (run = 0, got = bpref; got < cgp->cg_nclusterblks; got++) {
 		if ((map & bit) == 0) {
 			run = 0;
 		} else {
 			run++;
 			if (run == len)
 				break;
 		}
 		if ((got & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	if (got >= cgp->cg_nclusterblks)
 		goto fail;
 	/*
 	 * Allocate the cluster that we have found.
 	 */
 	for (i = 1; i <= len; i++)
 		if (!ffs_isblock(fs, cg_blksfree(cgp), got - run + i))
 			panic("ffs_clusteralloc: map mismatch");
 	bno = cg * fs->fs_fpg + blkstofrags(fs, got - run + 1);
 	if (dtog(fs, bno) != cg)
 		panic("ffs_clusteralloc: allocated out of group");
 	len = blkstofrags(fs, len);
 	for (i = 0; i < len; i += fs->fs_frag)
-		if ((got = ffs_alloccgblk(fs, cgp, bno + i)) != bno + i)
+		if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i)
 			panic("ffs_clusteralloc: lost block");
 	bdwrite(bp);
 	return (bno);
 
 fail:
 	brelse(bp);
 	return (0);
 }
 #endif
 
 /*
  * Determine whether an inode can be allocated.
  *
  * Check to see if an inode is available, and if it is,
  * allocate it using the following policy:
  *   1) allocate the requested inode.
  *   2) allocate the next available inode after the requested
  *      inode in the specified cylinder group.
  */
 static ino_t
 ffs_nodealloccg(ip, cg, ipref, mode)
 	struct inode *ip;
 	int cg;
 	ufs_daddr_t ipref;
 	int mode;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	int error, start, len, loc, map, i;
 
 	fs = ip->i_fs;
 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
 		return (0);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (0);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
 		brelse(bp);
 		return (0);
 	}
 	cgp->cg_time = time.tv_sec;
 	if (ipref) {
 		ipref %= fs->fs_ipg;
 		if (isclr(cg_inosused(cgp), ipref))
 			goto gotit;
 	}
 	start = cgp->cg_irotor / NBBY;
 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
 		if (loc == 0) {
 			printf("cg = %d, irotor = %ld, fs = %s\n",
 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
 			panic("ffs_nodealloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	i = start + len - loc;
 	map = cg_inosused(cgp)[i];
 	ipref = i * NBBY;
 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
 		if ((map & i) == 0) {
 			cgp->cg_irotor = ipref;
 			goto gotit;
 		}
 	}
 	printf("fs = %s\n", fs->fs_fsmnt);
 	panic("ffs_nodealloccg: block not in map");
 	/* NOTREACHED */
 gotit:
+	if (DOINGSOFTDEP(ITOV(ip)))
+		softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref);
 	setbit(cg_inosused(cgp), ipref);
 	cgp->cg_cs.cs_nifree--;
 	fs->fs_cstotal.cs_nifree--;
 	fs->fs_cs(fs, cg).cs_nifree--;
 	fs->fs_fmod = 1;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir++;
 		fs->fs_cstotal.cs_ndir++;
 		fs->fs_cs(fs, cg).cs_ndir++;
 	}
 	bdwrite(bp);
 	return (cg * fs->fs_ipg + ipref);
 }
 
 /*
  * Free a block or fragment.
  *
  * The specified block or fragment is placed back in the
  * free map. If a fragment is deallocated, a possible
  * block reassembly is checked.
  */
 void
 ffs_blkfree(ip, bno, size)
 	register struct inode *ip;
 	ufs_daddr_t bno;
 	long size;
 {
 	register struct fs *fs;
 	register struct cg *cgp;
 	struct buf *bp;
 	ufs_daddr_t blkno;
 	int i, error, cg, blk, frags, bbase;
 
 	fs = ip->i_fs;
-	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
-		printf("dev = 0x%lx, bsize = %ld, size = %ld, fs = %s\n",
-		    (u_long)ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
+	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 ||
+	    fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) {
+		printf("dev=0x%lx, bno = %d, bsize = %d, size = %ld, fs = %s\n",
+		    (u_long)ip->i_dev, bno, fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("ffs_blkfree: bad size");
 	}
 	cg = dtog(fs, bno);
 	if ((u_int)bno >= fs->fs_size) {
 		printf("bad block %ld, ino %ld\n", bno, ip->i_number);
 		ffs_fserr(fs, ip->i_uid, "bad block");
 		return;
 	}
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return;
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return;
 	}
 	cgp->cg_time = time.tv_sec;
 	bno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		blkno = fragstoblks(fs, bno);
-		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
+		if (!ffs_isfreeblock(fs, cg_blksfree(cgp), blkno)) {
 			printf("dev = 0x%lx, block = %ld, fs = %s\n",
 			    (u_long) ip->i_dev, bno, fs->fs_fsmnt);
 			panic("ffs_blkfree: freeing free block");
 		}
 		ffs_setblock(fs, cg_blksfree(cgp), blkno);
 		ffs_clusteracct(fs, cgp, blkno, 1);
 		cgp->cg_cs.cs_nbfree++;
 		fs->fs_cstotal.cs_nbfree++;
 		fs->fs_cs(fs, cg).cs_nbfree++;
 		i = cbtocylno(fs, bno);
 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
 		cg_blktot(cgp)[i]++;
 	} else {
 		bbase = bno - fragnum(fs, bno);
 		/*
 		 * decrement the counts associated with the old frags
 		 */
 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/*
 		 * deallocate the fragment
 		 */
 		frags = numfrags(fs, size);
 		for (i = 0; i < frags; i++) {
 			if (isset(cg_blksfree(cgp), bno + i)) {
 				printf("dev = 0x%lx, block = %ld, fs = %s\n",
 				    (u_long) ip->i_dev, bno + i, fs->fs_fsmnt);
 				panic("ffs_blkfree: freeing free frag");
 			}
 			setbit(cg_blksfree(cgp), bno + i);
 		}
 		cgp->cg_cs.cs_nffree += i;
 		fs->fs_cstotal.cs_nffree += i;
 		fs->fs_cs(fs, cg).cs_nffree += i;
 		/*
 		 * add back in counts associated with the new frags
 		 */
 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		/*
 		 * if a complete block has been reassembled, account for it
 		 */
 		blkno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, blkno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			fs->fs_cstotal.cs_nbfree++;
 			fs->fs_cs(fs, cg).cs_nbfree++;
 			i = cbtocylno(fs, bbase);
 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
 			cg_blktot(cgp)[i]++;
 		}
 	}
 	fs->fs_fmod = 1;
 	bdwrite(bp);
 }
 
 #ifdef DIAGNOSTIC
 /*
  * Verify allocation of a block or fragment. Returns true if block or
  * fragment is allocated, false if it is free.
  */
 static int
 ffs_checkblk(ip, bno, size)
 	struct inode *ip;
 	ufs_daddr_t bno;
 	long size;
 {
 	struct fs *fs;
 	struct cg *cgp;
 	struct buf *bp;
 	int i, error, frags, free;
 
 	fs = ip->i_fs;
 	if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) {
 		printf("bsize = %d, size = %d, fs = %s\n",
 		    fs->fs_bsize, size, fs->fs_fsmnt);
 		panic("ffs_checkblk: bad size");
 	}
 	if ((u_int)bno >= fs->fs_size)
 		panic("ffs_checkblk: bad block %d", bno);
 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error)
 		panic("ffs_checkblk: cg bread failed");
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp))
 		panic("ffs_checkblk: cg magic mismatch");
 	bno = dtogd(fs, bno);
 	if (size == fs->fs_bsize) {
 		free = ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
 	} else {
 		frags = numfrags(fs, size);
 		for (free = 0, i = 0; i < frags; i++)
 			if (isset(cg_blksfree(cgp), bno + i))
 				free++;
 		if (free != 0 && free != frags)
 			panic("ffs_checkblk: partially free fragment");
 	}
 	brelse(bp);
 	return (!free);
 }
 #endif /* DIAGNOSTIC */
 
 /*
  * Free an inode.
- *
- * The specified inode is placed back in the free map.
  */
 int
-ffs_vfree(pvp, ino, mode)
+ffs_vfree( pvp, ino, mode)
 	struct vnode *pvp;
 	ino_t ino;
 	int mode;
 {
+	if (DOINGSOFTDEP(pvp)) {
+		softdep_freefile(pvp, ino, mode);
+		return (0);
+	}
+	return (ffs_freefile(pvp, ino, mode));
+}
+
+/*
+ * Do the actual free operation.
+ * The specified inode is placed back in the free map.
+ */
+ int
+ ffs_freefile( pvp, ino, mode)
+	struct vnode *pvp;
+	ino_t ino;
+	int mode;
+{
 	register struct fs *fs;
 	register struct cg *cgp;
 	register struct inode *pip;
 	struct buf *bp;
 	int error, cg;
 
 	pip = VTOI(pvp);
 	fs = pip->i_fs;
 	if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg)
 		panic("ffs_vfree: range: dev = 0x%x, ino = %d, fs = %s",
 		    pip->i_dev, ino, fs->fs_fsmnt);
 	cg = ino_to_cg(fs, ino);
 	error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
 		(int)fs->fs_cgsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
-		return (0);
+		return (error);
 	}
 	cgp = (struct cg *)bp->b_data;
 	if (!cg_chkmagic(cgp)) {
 		brelse(bp);
 		return (0);
 	}
 	cgp->cg_time = time.tv_sec;
 	ino %= fs->fs_ipg;
 	if (isclr(cg_inosused(cgp), ino)) {
 		printf("dev = 0x%lx, ino = %ld, fs = %s\n",
 		    (u_long)pip->i_dev, ino, fs->fs_fsmnt);
 		if (fs->fs_ronly == 0)
 			panic("ffs_vfree: freeing free inode");
 	}
 	clrbit(cg_inosused(cgp), ino);
 	if (ino < cgp->cg_irotor)
 		cgp->cg_irotor = ino;
 	cgp->cg_cs.cs_nifree++;
 	fs->fs_cstotal.cs_nifree++;
 	fs->fs_cs(fs, cg).cs_nifree++;
 	if ((mode & IFMT) == IFDIR) {
 		cgp->cg_cs.cs_ndir--;
 		fs->fs_cstotal.cs_ndir--;
 		fs->fs_cs(fs, cg).cs_ndir--;
 	}
 	fs->fs_fmod = 1;
 	bdwrite(bp);
 	return (0);
 }
 
 /*
  * Find a block of the specified size in the specified cylinder group.
  *
  * It is a panic if a request is made to find a block if none are
  * available.
  */
 static ufs_daddr_t
 ffs_mapsearch(fs, cgp, bpref, allocsiz)
 	register struct fs *fs;
 	register struct cg *cgp;
 	ufs_daddr_t bpref;
 	int allocsiz;
 {
 	ufs_daddr_t bno;
 	int start, len, loc, i;
 	int blk, field, subfield, pos;
 
 	/*
 	 * find the fragment by searching through the free block
 	 * map for an appropriate bit pattern
 	 */
 	if (bpref)
 		start = dtogd(fs, bpref) / NBBY;
 	else
 		start = cgp->cg_frotor / NBBY;
 	len = howmany(fs->fs_fpg, NBBY) - start;
 	loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start],
 		(u_char *)fragtbl[fs->fs_frag],
 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 	if (loc == 0) {
 		len = start + 1;
 		start = 0;
 		loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0],
 			(u_char *)fragtbl[fs->fs_frag],
 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
 		if (loc == 0) {
 			printf("start = %d, len = %d, fs = %s\n",
 			    start, len, fs->fs_fsmnt);
 			panic("ffs_alloccg: map corrupted");
 			/* NOTREACHED */
 		}
 	}
 	bno = (start + len - loc) * NBBY;
 	cgp->cg_frotor = bno;
 	/*
 	 * found the byte in the map
 	 * sift through the bits to find the selected frag
 	 */
 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
 		blk = blkmap(fs, cg_blksfree(cgp), bno);
 		blk <<= 1;
 		field = around[allocsiz];
 		subfield = inside[allocsiz];
 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
 			if ((blk & field) == subfield)
 				return (bno + pos);
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 	printf("bno = %lu, fs = %s\n", (u_long)bno, fs->fs_fsmnt);
 	panic("ffs_alloccg: block not in map");
 	return (-1);
 }
 
 /*
  * Update the cluster map because of an allocation or free.
  *
  * Cnt == 1 means free; cnt == -1 means allocating.
  */
 static void
 ffs_clusteracct(fs, cgp, blkno, cnt)
 	struct fs *fs;
 	struct cg *cgp;
 	ufs_daddr_t blkno;
 	int cnt;
 {
 	int32_t *sump;
 	int32_t *lp;
 	u_char *freemapp, *mapp;
 	int i, start, end, forw, back, map, bit;
 
 	if (fs->fs_contigsumsize <= 0)
 		return;
 	freemapp = cg_clustersfree(cgp);
 	sump = cg_clustersum(cgp);
 	/*
 	 * Allocate or clear the actual block.
 	 */
 	if (cnt > 0)
 		setbit(freemapp, blkno);
 	else
 		clrbit(freemapp, blkno);
 	/*
 	 * Find the size of the cluster going forward.
 	 */
 	start = blkno + 1;
 	end = start + fs->fs_contigsumsize;
 	if (end >= cgp->cg_nclusterblks)
 		end = cgp->cg_nclusterblks;
 	mapp = &freemapp[start / NBBY];
 	map = *mapp++;
 	bit = 1 << (start % NBBY);
 	for (i = start; i < end; i++) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != (NBBY - 1)) {
 			bit <<= 1;
 		} else {
 			map = *mapp++;
 			bit = 1;
 		}
 	}
 	forw = i - start;
 	/*
 	 * Find the size of the cluster going backward.
 	 */
 	start = blkno - 1;
 	end = start - fs->fs_contigsumsize;
 	if (end < 0)
 		end = -1;
 	mapp = &freemapp[start / NBBY];
 	map = *mapp--;
 	bit = 1 << (start % NBBY);
 	for (i = start; i > end; i--) {
 		if ((map & bit) == 0)
 			break;
 		if ((i & (NBBY - 1)) != 0) {
 			bit >>= 1;
 		} else {
 			map = *mapp--;
 			bit = 1 << (NBBY - 1);
 		}
 	}
 	back = start - i;
 	/*
 	 * Account for old cluster and the possibly new forward and
 	 * back clusters.
 	 */
 	i = back + forw + 1;
 	if (i > fs->fs_contigsumsize)
 		i = fs->fs_contigsumsize;
 	sump[i] += cnt;
 	if (back > 0)
 		sump[back] -= cnt;
 	if (forw > 0)
 		sump[forw] -= cnt;
 	/*
 	 * Update cluster summary information.
 	 */
 	lp = &sump[fs->fs_contigsumsize];
 	for (i = fs->fs_contigsumsize; i > 0; i--)
 		if (*lp-- > 0)
 			break;
 	fs->fs_maxcluster[cgp->cg_cgx] = i;
 }
 
 /*
  * Fserr prints the name of a file system with an error diagnostic.
  *
  * The form of the error message is:
  *	fs: error message
  */
 static void
 ffs_fserr(fs, uid, cp)
 	struct fs *fs;
 	u_int uid;
 	char *cp;
 {
 	struct proc *p = curproc;	/* XXX */
 
 	log(LOG_ERR, "pid %d (%s), uid %d on %s: %s\n", p ? p->p_pid : -1,
 			p ? p->p_comm : "-", uid, fs->fs_fsmnt, cp);
 }
Index: head/sys/ufs/ffs/ffs_balloc.c
===================================================================
--- head/sys/ufs/ffs/ffs_balloc.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_balloc.c	(revision 34266)
@@ -1,312 +1,354 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
- * $Id: ffs_balloc.c,v 1.18 1998/02/04 22:33:31 eivind Exp $
+ * $Id: ffs_balloc.c,v 1.19 1998/02/06 12:14:14 eivind Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/lock.h>
+#include <sys/mount.h>
 #include <sys/vnode.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  */
 int
-ffs_balloc(ip, lbn, size, cred, bpp, flags)
+ffs_balloc(ap)
+	struct vop_balloc_args /* {
+		struct inode *a_ip;
+		ufs_daddr_t a_lbn;
+		int a_size;
+		struct ucred *a_cred;
+		int a_flags;
+		struct buf *a_bpp;
+	} */ *ap;
+{
 	register struct inode *ip;
 	register ufs_daddr_t lbn;
 	int size;
 	struct ucred *cred;
-	struct buf **bpp;
 	int flags;
-{
-	register struct fs *fs;
-	register ufs_daddr_t nb;
+	struct fs *fs;
+	ufs_daddr_t nb;
 	struct buf *bp, *nbp;
 	struct vnode *vp = ITOV(ip);
 	struct indir indirs[NIADDR + 2];
 	ufs_daddr_t newb, *bap, pref;
 	int deallocated, osize, nsize, num, i, error;
 	ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1];
 
-	*bpp = NULL;
+	vp = ap->a_vp;
+	ip = VTOI(vp);
+	fs = ip->i_fs;
+	lbn = lblkno(fs, ap->a_startoffset);
+	size = blkoff(fs, ap->a_startoffset) + ap->a_size;
+	if (size > fs->fs_bsize)
+		panic("ffs_balloc: blk too big");
+	*ap->a_bpp = NULL;
 	if (lbn < 0)
 		return (EFBIG);
-	fs = ip->i_fs;
+	cred = ap->a_cred;
+	flags = ap->a_flags;
 
 	/*
 	 * If the next write will extend the file into a new block,
 	 * and the file is currently composed of a fragment
 	 * this fragment has to be extended to be a full block.
 	 */
 	nb = lblkno(fs, ip->i_size);
 	if (nb < NDADDR && nb < lbn) {
 		osize = blksize(fs, ip, nb);
 		if (osize < fs->fs_bsize && osize > 0) {
 			error = ffs_realloccg(ip, nb,
 				ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]),
 				osize, (int)fs->fs_bsize, cred, &bp);
 			if (error)
 				return (error);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, nb,
+				    dbtofsb(fs, bp->b_blkno), ip->i_db[nb],
+				    fs->fs_bsize, osize, bp);
 			ip->i_size = smalllblktosize(fs, nb + 1);
 			ip->i_db[nb] = dbtofsb(fs, bp->b_blkno);
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 			if (flags & B_SYNC)
 				bwrite(bp);
 			else
 				bawrite(bp);
 		}
 	}
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
 	if (lbn < NDADDR) {
 		nb = ip->i_db[lbn];
 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			bp->b_blkno = fsbtodb(fs, nb);
-			*bpp = bp;
+			*ap->a_bpp = bp;
 			return (0);
 		}
 		if (nb != 0) {
 			/*
 			 * Consider need to reallocate a fragment.
 			 */
 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
 				error = bread(vp, lbn, osize, NOCRED, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
 				}
 				bp->b_blkno = fsbtodb(fs, nb);
 			} else {
 				error = ffs_realloccg(ip, lbn,
 				    ffs_blkpref(ip, lbn, (int)lbn,
 					&ip->i_db[0]), osize, nsize, cred, &bp);
 				if (error)
 					return (error);
+				if (DOINGSOFTDEP(vp))
+					softdep_setup_allocdirect(ip, lbn,
+					    dbtofsb(fs, bp->b_blkno), nb,
+					    nsize, osize, bp);
 			}
 		} else {
 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->fs_bsize;
 			error = ffs_alloc(ip, lbn,
 			    ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]),
 			    nsize, cred, &newb);
 			if (error)
 				return (error);
 			bp = getblk(vp, lbn, nsize, 0, 0);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & B_CLRBUF)
 				vfs_bio_clrbuf(bp);
+			if (DOINGSOFTDEP(vp))
+				softdep_setup_allocdirect(ip, lbn, newb, 0,
+				    nsize, 0, bp);
 		}
 		ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
-		*bpp = bp;
+		*ap->a_bpp = bp;
 		return (0);
 	}
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 	pref = 0;
 	if (error = ufs_getlbns(vp, lbn, indirs, &num))
 		return(error);
 #ifdef DIAGNOSTIC
 	if (num < 1)
 		panic ("ffs_balloc: ufs_bmaparray returned indirect block");
 #endif
 	/*
 	 * Fetch the first indirect block allocating if necessary.
 	 */
 	--num;
 	nb = ip->i_ib[indirs[0].in_off];
 	allocib = NULL;
 	allocblk = allociblk;
 	if (nb == 0) {
 		pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
 	        if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
 		    cred, &newb))
 			return (error);
 		nb = newb;
 		*allocblk++ = nb;
 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0);
 		bp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(bp);
-		/*
-		 * Write synchronously so that indirect blocks
-		 * never point at garbage.
-		 */
-		if (error = bwrite(bp))
-			goto fail;
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off,
+			    newb, 0, fs->fs_bsize, 0, bp);
+			bdwrite(bp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if (error = bwrite(bp))
+				goto fail;
+		}
 		allocib = &ip->i_ib[indirs[0].in_off];
 		*allocib = nb;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * Fetch through the indirect blocks, allocating as necessary.
 	 */
 	for (i = 1;;) {
 		error = bread(vp,
 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			goto fail;
 		}
 		bap = (ufs_daddr_t *)bp->b_data;
 		nb = bap[indirs[i].in_off];
 		if (i == num)
 			break;
 		i += 1;
 		if (nb != 0) {
 			bqrelse(bp);
 			continue;
 		}
 		if (pref == 0)
 			pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0);
 		if (error =
 		    ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) {
 			brelse(bp);
 			goto fail;
 		}
 		nb = newb;
 		*allocblk++ = nb;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(nbp);
-		/*
-		 * Write synchronously so that indirect blocks
-		 * never point at garbage.
-		 */
-		if (error = bwrite(nbp)) {
-			brelse(bp);
-			goto fail;
+		if (DOINGSOFTDEP(vp)) {
+			softdep_setup_allocindir_meta(nbp, ip, bp,
+			    indirs[i - 1].in_off, nb);
+			bdwrite(nbp);
+		} else {
+			/*
+			 * Write synchronously so that indirect blocks
+			 * never point at garbage.
+			 */
+			if (error = bwrite(nbp)) {
+				brelse(bp);
+				goto fail;
+			}
 		}
 		bap[indirs[i - 1].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			if (bp->b_bufsize == fs->fs_bsize)
 				bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
 	}
 	/*
 	 * Get the data block, allocating if necessary.
 	 */
 	if (nb == 0) {
 		pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]);
 		error = ffs_alloc(ip,
 		    lbn, pref, (int)fs->fs_bsize, cred, &newb);
 		if (error) {
 			brelse(bp);
 			goto fail;
 		}
 		nb = newb;
 		*allocblk++ = nb;
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & B_CLRBUF)
 			vfs_bio_clrbuf(nbp);
+		if (DOINGSOFTDEP(vp))
+			softdep_setup_allocindir_page(ip, lbn, bp,
+			    indirs[i].in_off, nb, 0, nbp);
 		bap[indirs[i].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			if (bp->b_bufsize == fs->fs_bsize)
 				bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
-		*bpp = nbp;
+		*ap->a_bpp = nbp;
 		return (0);
 	}
 	brelse(bp);
 	if (flags & B_CLRBUF) {
 		error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
 		if (error) {
 			brelse(nbp);
 			goto fail;
 		}
 	} else {
 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
-	*bpp = nbp;
+	*ap->a_bpp = nbp;
 	return (0);
 fail:
 	/*
 	 * If we have failed part way through block allocation, we
 	 * have to deallocate any indirect blocks that we have allocated.
 	 */
 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
 		ffs_blkfree(ip, *blkp, fs->fs_bsize);
 		deallocated += fs->fs_bsize;
 	}
 	if (allocib != NULL)
 		*allocib = 0;
 	if (deallocated) {
 #ifdef QUOTA
 		/*
 		 * Restore user's disk quota because allocation failed.
 		 */
 		(void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE);
 #endif
 		ip->i_blocks -= btodb(deallocated);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	return (error);
 }
Index: head/sys/ufs/ffs/ffs_extern.h
===================================================================
--- head/sys/ufs/ffs/ffs_extern.h	(revision 34265)
+++ head/sys/ufs/ffs/ffs_extern.h	(revision 34266)
@@ -1,105 +1,133 @@
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_extern.h	8.6 (Berkeley) 3/30/95
- * $Id: ffs_extern.h,v 1.21 1997/11/22 08:35:45 bde Exp $
+ * $Id: ffs_extern.h,v 1.22 1998/02/03 21:52:00 bde Exp $
  */
 
 #ifndef _UFS_FFS_EXTERN_H
 #define	_UFS_FFS_EXTERN_H
 
 /*
  * Sysctl values for the fast filesystem.
  */
 #define FFS_REALLOCBLKS		3	/* block reallocation enabled */
 #define FFS_ASYNCFREE		4	/* asynchronous block freeing enabled */
 #define	FFS_MAXID		5	/* number of valid ffs ids */
 
 #define FFS_NAMES { \
 	{ 0, 0 }, \
 	{ 0, 0 }, \
 	{ 0, 0 }, \
 	{ "doreallocblks", CTLTYPE_INT }, \
 	{ "doasyncfree", CTLTYPE_INT }, \
 }
 
 struct buf;
 struct fid;
 struct fs;
 struct inode;
 struct malloc_type;
 struct mount;
 struct proc;
 struct sockaddr;
 struct statfs;
 struct ucred;
 struct vnode;
 struct vop_bmap_args;
 struct vop_reallocblks_args;
 
 int	ffs_alloc __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, struct ucred *, ufs_daddr_t *));
-int	ffs_balloc __P((struct inode *,
-	    ufs_daddr_t, int, struct ucred *, struct buf **, int));
+int	ffs_balloc __P((struct vop_balloc_args *));
 int	ffs_blkatoff __P((struct vnode *, off_t, char **, struct buf **));
 void	ffs_blkfree __P((struct inode *, ufs_daddr_t, long));
 ufs_daddr_t ffs_blkpref __P((struct inode *, ufs_daddr_t, int, ufs_daddr_t *));
 int	ffs_bmap __P((struct vop_bmap_args *));
 void	ffs_clrblock __P((struct fs *, u_char *, ufs_daddr_t));
 int	ffs_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
 	    struct vnode **, int *, struct ucred **));
 int	ffs_flushfiles __P((struct mount *, int, struct proc *));
 void	ffs_fragacct __P((struct fs *, int, int32_t [], int));
+int	ffs_freefile __P(( struct vnode *, ino_t, int ));
 int	ffs_isblock __P((struct fs *, u_char *, ufs_daddr_t));
+int	ffs_isfreeblock __P((struct fs *, unsigned char *, ufs_daddr_t));
 int	ffs_mountfs __P((struct vnode *, struct mount *, struct proc *,
 	     struct malloc_type *));
 int	ffs_mountroot __P((void));
 int	ffs_reallocblks __P((struct vop_reallocblks_args *));
 int	ffs_realloccg __P((struct inode *,
 	    ufs_daddr_t, ufs_daddr_t, int, int, struct ucred *, struct buf **));
 void	ffs_setblock __P((struct fs *, u_char *, ufs_daddr_t));
 int	ffs_statfs __P((struct mount *, struct statfs *, struct proc *));
 int	ffs_sync __P((struct mount *, int, struct ucred *, struct proc *));
 int	ffs_truncate __P((struct vnode *, off_t, int, struct ucred *, struct proc *));
 int	ffs_unmount __P((struct mount *, int, struct proc *));
 int	ffs_update __P((struct vnode *, struct timeval *, struct timeval *, int));
 int	ffs_valloc __P((struct vnode *, int, struct ucred *, struct vnode **));
 
 int	ffs_vfree __P((struct vnode *, ino_t, int));
 int	ffs_vget __P((struct mount *, ino_t, struct vnode **));
 int	ffs_vptofh __P((struct vnode *, struct fid *));
 
 extern vop_t **ffs_vnodeop_p;
 extern vop_t **ffs_specop_p;
 extern vop_t **ffs_fifoop_p;
 
+/*
+ * Soft update function prototypes.
+ */
+void	softdep_initialize __P((void));
+int	softdep_process_worklist __P((struct mount *));
+int	softdep_mount __P((struct vnode *, struct mount *, struct fs *,
+	    struct ucred *));
+int	softdep_flushfiles __P((struct mount *, int, struct proc *));
+void	softdep_update_inodeblock __P((struct inode *, struct buf *, int));
+void	softdep_load_inodeblock __P((struct inode *));
+int	softdep_fsync __P((struct vnode *));
+void	softdep_freefile __P((struct vnode *, ino_t, int));
+void	softdep_setup_freeblocks __P((struct inode *, off_t));
+void	softdep_deallocate_dependencies __P((struct buf *));
+void	softdep_setup_inomapdep __P((struct buf *, struct inode *, ino_t));
+void	softdep_setup_blkmapdep __P((struct buf *, struct fs *, ufs_daddr_t));
+void	softdep_setup_allocdirect __P((struct inode *, ufs_lbn_t, ufs_daddr_t,
+	    ufs_daddr_t, long, long, struct buf *));
+void	softdep_setup_allocindir_meta __P((struct buf *, struct inode *,
+	    struct buf *, int, ufs_daddr_t));
+void	softdep_setup_allocindir_page __P((struct inode *, ufs_lbn_t,
+	    struct buf *, int, ufs_daddr_t, ufs_daddr_t, struct buf *));
+void	softdep_disk_io_initiation __P((struct buf *));
+void	softdep_disk_write_complete __P((struct buf *));
+int	softdep_sync_metadata __P((struct vop_fsync_args *));
+
 #endif /* !_UFS_FFS_EXTERN_H */
+
Index: head/sys/ufs/ffs/ffs_inode.c
===================================================================
--- head/sys/ufs/ffs/ffs_inode.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_inode.c	(revision 34266)
@@ -1,520 +1,558 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_inode.c	8.13 (Berkeley) 4/21/95
- * $Id: ffs_inode.c,v 1.34 1998/02/06 12:14:14 eivind Exp $
+ * $Id: ffs_inode.c,v 1.35 1998/03/07 21:36:33 dyson Exp $
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/mount.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/vnode.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/resourcevar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 static int ffs_indirtrunc __P((struct inode *, ufs_daddr_t, ufs_daddr_t,
 	    ufs_daddr_t, int, long *));
 
 /*
  * Update the access, modified, and inode change times as specified by the
  * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED
  * flag is used to specify that the inode needs to be updated even if none
  * of the times needs to be updated. The access and modified times are taken
  * from the second and third parameters; the inode change time is always
  * taken from the current time. If waitfor is set, then wait for the disk
  * write of the inode to complete.
  */
 int
 ffs_update(vp, access, modify, waitfor)
 	struct vnode *vp;
 	struct timeval *access;
 	struct timeval *modify;
 	int waitfor;
 {
 	register struct fs *fs;
 	struct buf *bp;
 	struct inode *ip;
 	int error;
 	time_t tv_sec;
 
 	ip = VTOI(vp);
 	if (vp->v_mount->mnt_flag & MNT_RDONLY) {
 		ip->i_flag &=
 		    ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
 		return (0);
 	}
-	if ((ip->i_flag &
-	    (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0)
+	if (((ip->i_flag &
+	      (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
+	    (waitfor != MNT_WAIT))
 		return (0);
 	/*
 	 * Use a copy of the current time to get consistent timestamps
 	 * (a_access and a_modify are sometimes aliases for &time).
 	 *
 	 * XXX in 2.0, a_access and a_modify are often pointers to the
 	 * same copy of `time'.  This is not as good.  Some callers forget
 	 * to make a copy; others make a copy too early (before the i/o
 	 * has completed)...
 	 *
 	 * XXX there should be a function or macro for reading the time
 	 * (e.g., some machines may require splclock()).
 	 */
 	tv_sec = time.tv_sec;
 	if (ip->i_flag & IN_ACCESS)
 		ip->i_atime =
 		    (access == &time ? tv_sec : access->tv_sec);
 	if (ip->i_flag & IN_UPDATE) {
 		ip->i_mtime =
 		    (modify == &time ? tv_sec : modify->tv_sec);
 		ip->i_modrev++;
 	}
 	if (ip->i_flag & IN_CHANGE)
 		ip->i_ctime = tv_sec;
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE);
 	fs = ip->i_fs;
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
 	 */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
 		ip->i_din.di_ouid = ip->i_uid;		/* XXX */
 		ip->i_din.di_ogid = ip->i_gid;		/* XXX */
 	}						/* XXX */
 	error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		(int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
+	if (DOINGSOFTDEP(vp))
+		softdep_update_inodeblock(ip, bp, waitfor);
+	else if (ip->i_effnlink != ip->i_nlink)
+		panic("ffs_update: bad link cnt");
 	*((struct dinode *)bp->b_data +
 	    ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
-	if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0)
+	if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
 		return (bwrite(bp));
-	else {
+	} else {
 		if (bp->b_bufsize == fs->fs_bsize)
 			bp->b_flags |= B_CLUSTEROK;
 		bdwrite(bp);
 		return (0);
 	}
 }
 
 #define	SINGLE	0	/* index of single indirect block */
 #define	DOUBLE	1	/* index of double indirect block */
 #define	TRIPLE	2	/* index of triple indirect block */
 /*
  * Truncate the inode oip to at most length size, freeing the
  * disk blocks.
  */
 int
 ffs_truncate(vp, length, flags, cred, p)
 	struct vnode *vp;
 	off_t length;
 	int flags;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *ovp = vp;
 	ufs_daddr_t lastblock;
 	register struct inode *oip;
 	ufs_daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR];
 	ufs_daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR];
 	register struct fs *fs;
 	struct buf *bp;
 	int offset, size, level;
 	long count, nblocks, vflags, blocksreleased = 0;
 	struct timeval tv;
 	register int i;
 	int aflags, error, allerror;
 	off_t osize;
 
 	oip = VTOI(ovp);
+	if (oip->i_size == length)
+		return (0);
 	fs = oip->i_fs;
 	if (length < 0)
 		return (EINVAL);
 	if (length > fs->fs_maxfilesize)
 		return (EFBIG);
 	gettime(&tv);
 	if (ovp->v_type == VLNK &&
 	    (oip->i_size < ovp->v_mount->mnt_maxsymlinklen || oip->i_din.di_blocks == 0)) {
 #ifdef DIAGNOSTIC
 		if (length != 0)
 			panic("ffs_truncate: partial truncate of symlink");
 #endif
 		bzero((char *)&oip->i_shortlink, (u_int)oip->i_size);
 		oip->i_size = 0;
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (UFS_UPDATE(ovp, &tv, &tv, 1));
 	}
 	if (oip->i_size == length) {
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (UFS_UPDATE(ovp, &tv, &tv, 0));
 	}
 #ifdef QUOTA
 	error = getinoquota(oip);
 	if (error)
 		return (error);
 #endif
+	ovp->v_lasta = ovp->v_clen = ovp->v_cstart = ovp->v_lastw = 0;
+	if (DOINGSOFTDEP(ovp)) {
+		if (length > 0) {
+			/*
+			 * If a file is only partially truncated, then
+			 * we have to clean up the data structures
+			 * describing the allocation past the truncation
+			 * point. Finding and deallocating those structures
+			 * is a lot of work. Since partial truncation occurs
+			 * rarely, we solve the problem by syncing the file
+			 * so that it will have no data structures left.
+			 */
+			if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT,
+			    p)) != 0)
+				return (error);
+		} else {
+#ifdef QUOTA
+			(void) chkdq(oip, -oip->i_blocks, NOCRED, 0);
+#endif
+			softdep_setup_freeblocks(oip, length);
+			(void) vinvalbuf(ovp, 0, cred, p, 0, 0);
+			oip->i_flag |= IN_CHANGE | IN_UPDATE;
+			return (ffs_update(ovp, &tv, &tv, 0));
+		}
+	}
 	osize = oip->i_size;
 	/*
 	 * Lengthen the size of the file. We must ensure that the
 	 * last byte of the file is allocated. Since the smallest
 	 * value of osize is 0, length will be at least 1.
 	 */
 	if (osize < length) {
 		vnode_pager_setsize(ovp, length);
+#if 0
 		offset = blkoff(fs, length - 1);
 		lbn = lblkno(fs, length - 1);
+#endif
 		aflags = B_CLRBUF;
 		if (flags & IO_SYNC)
 			aflags |= B_SYNC;
-		error = ffs_balloc(oip, lbn, offset + 1, cred,
-		    &bp, aflags);
+		error = VOP_BALLOC(ovp, length - 1, 1,
+		    cred, aflags, &bp);
 		if (error)
 			return (error);
 		oip->i_size = length;
 		if (bp->b_bufsize == fs->fs_bsize)
 			bp->b_flags |= B_CLUSTEROK;
 		if (aflags & B_SYNC)
 			bwrite(bp);
 		else if (ovp->v_mount->mnt_flag & MNT_ASYNC)
 			bdwrite(bp);
 		else
 			bawrite(bp);
 		oip->i_flag |= IN_CHANGE | IN_UPDATE;
 		return (UFS_UPDATE(ovp, &tv, &tv, 1));
 	}
 	/*
 	 * Shorten the size of the file. If the file is not being
 	 * truncated to a block boundry, the contents of the
 	 * partial block following the end of the file must be
 	 * zero'ed in case it ever become accessable again because
 	 * of subsequent file growth.
 	 */
 	offset = blkoff(fs, length);
 	if (offset == 0) {
 		oip->i_size = length;
 	} else {
 		lbn = lblkno(fs, length);
 		aflags = B_CLRBUF;
 		if (flags & IO_SYNC)
 			aflags |= B_SYNC;
-		error = ffs_balloc(oip, lbn, offset, cred, &bp, aflags);
-		if (error)
+		error = VOP_BALLOC(ovp, length - 1, 1, cred, aflags, &bp);
+		if (error) {
+#if 0	/* kirk's version had this */
+			vnode_pager_setsize(ovp, (u_long)osize);
+#endif
 			return (error);
+		}
 		oip->i_size = length;
 		size = blksize(fs, oip, lbn);
 		bzero((char *)bp->b_data + offset, (u_int)(size - offset));
 		allocbuf(bp, size);
 		if (bp->b_bufsize == fs->fs_bsize)
 			bp->b_flags |= B_CLUSTEROK;
 		if (aflags & B_SYNC)
 			bwrite(bp);
 		else if (ovp->v_mount->mnt_flag & MNT_ASYNC)
 			bdwrite(bp);
 		else
 			bawrite(bp);
 	}
 	/*
 	 * Calculate index into inode's block list of
 	 * last direct and indirect blocks (if any)
 	 * which we want to keep.  Lastblock is -1 when
 	 * the file is truncated to 0.
 	 */
 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
 	lastiblock[SINGLE] = lastblock - NDADDR;
 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
 	nblocks = btodb(fs->fs_bsize);
 	/*
 	 * Update file and block pointers on disk before we start freeing
 	 * blocks.  If we crash before free'ing blocks below, the blocks
 	 * will be returned to the free list.  lastiblock values are also
 	 * normalized to -1 for calls to ffs_indirtrunc below.
 	 */
 	bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks);
 	for (level = TRIPLE; level >= SINGLE; level--)
 		if (lastiblock[level] < 0) {
 			oip->i_ib[level] = 0;
 			lastiblock[level] = -1;
 		}
 	for (i = NDADDR - 1; i > lastblock; i--)
 		oip->i_db[i] = 0;
 	oip->i_flag |= IN_CHANGE | IN_UPDATE;
 	error = UFS_UPDATE(ovp, &tv, &tv, ((length > 0) ? 0 : 1));
 	if (error)
 		allerror = error;
 	/*
 	 * Having written the new inode to disk, save its new configuration
 	 * and put back the old block pointers long enough to process them.
 	 * Note that we save the new block configuration so we can check it
 	 * when we are done.
 	 */
 	bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks);
 	bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks);
 	oip->i_size = osize;
 	vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
 	allerror = vinvalbuf(ovp, vflags, cred, p, 0, 0);
 	vnode_pager_setsize(ovp, length);
 
 	/*
 	 * Indirect blocks first.
 	 */
 	indir_lbn[SINGLE] = -NDADDR;
 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
 	for (level = TRIPLE; level >= SINGLE; level--) {
 		bn = oip->i_ib[level];
 		if (bn != 0) {
 			error = ffs_indirtrunc(oip, indir_lbn[level],
 			    fsbtodb(fs, bn), lastiblock[level], level, &count);
 			if (error)
 				allerror = error;
 			blocksreleased += count;
 			if (lastiblock[level] < 0) {
 				oip->i_ib[level] = 0;
 				ffs_blkfree(oip, bn, fs->fs_bsize);
 				blocksreleased += nblocks;
 			}
 		}
 		if (lastiblock[level] >= 0)
 			goto done;
 	}
 
 	/*
 	 * All whole direct blocks or frags.
 	 */
 	for (i = NDADDR - 1; i > lastblock; i--) {
 		register long bsize;
 
 		bn = oip->i_db[i];
 		if (bn == 0)
 			continue;
 		oip->i_db[i] = 0;
 		bsize = blksize(fs, oip, i);
 		ffs_blkfree(oip, bn, bsize);
 		blocksreleased += btodb(bsize);
 	}
 	if (lastblock < 0)
 		goto done;
 
 	/*
 	 * Finally, look for a change in size of the
 	 * last direct block; release any frags.
 	 */
 	bn = oip->i_db[lastblock];
 	if (bn != 0) {
 		long oldspace, newspace;
 
 		/*
 		 * Calculate amount of space we're giving
 		 * back as old block size minus new block size.
 		 */
 		oldspace = blksize(fs, oip, lastblock);
 		oip->i_size = length;
 		newspace = blksize(fs, oip, lastblock);
 		if (newspace == 0)
 			panic("ffs_truncate: newspace");
 		if (oldspace - newspace > 0) {
 			/*
 			 * Block number of space to be free'd is
 			 * the old block # plus the number of frags
 			 * required for the storage we're keeping.
 			 */
 			bn += numfrags(fs, newspace);
 			ffs_blkfree(oip, bn, oldspace - newspace);
 			blocksreleased += btodb(oldspace - newspace);
 		}
 	}
 done:
 #ifdef DIAGNOSTIC
 	for (level = SINGLE; level <= TRIPLE; level++)
 		if (newblks[NDADDR + level] != oip->i_ib[level])
 			panic("ffs_truncate1");
 	for (i = 0; i < NDADDR; i++)
 		if (newblks[i] != oip->i_db[i])
 			panic("ffs_truncate2");
 	if (length == 0 &&
 	    (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first))
 		panic("ffs_truncate3");
 #endif /* DIAGNOSTIC */
 	/*
 	 * Put back the real size.
 	 */
 	oip->i_size = length;
 	oip->i_blocks -= blocksreleased;
 	if (oip->i_blocks < 0)			/* sanity */
 		oip->i_blocks = 0;
 	oip->i_flag |= IN_CHANGE;
 	vnode_pager_setsize(ovp, length);
 #ifdef QUOTA
 	(void) chkdq(oip, -blocksreleased, NOCRED, 0);
 #endif
 	return (allerror);
 }
 
 /*
  * Release blocks associated with the inode ip and stored in the indirect
  * block bn.  Blocks are free'd in LIFO order up to (but not including)
  * lastbn.  If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  *
  * NB: triple indirect blocks are untested.
  */
 static int
 ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
 	register struct inode *ip;
 	ufs_daddr_t lbn, lastbn;
 	ufs_daddr_t dbn;
 	int level;
 	long *countp;
 {
 	register int i;
 	struct buf *bp;
 	register struct fs *fs = ip->i_fs;
 	register ufs_daddr_t *bap;
 	struct vnode *vp;
 	ufs_daddr_t *copy = NULL, nb, nlbn, last;
 	long blkcount, factor;
 	int nblocks, blocksreleased = 0;
 	int error = 0, allerror = 0;
 
 	/*
 	 * Calculate index in current block of last
 	 * block to be kept.  -1 indicates the entire
 	 * block so we need not calculate the index.
 	 */
 	factor = 1;
 	for (i = SINGLE; i < level; i++)
 		factor *= NINDIR(fs);
 	last = lastbn;
 	if (lastbn > 0)
 		last /= factor;
 	nblocks = btodb(fs->fs_bsize);
 	/*
 	 * Get buffer of block pointers, zero those entries corresponding
 	 * to blocks to be free'd, and update on disk copy first.  Since
 	 * double(triple) indirect before single(double) indirect, calls
 	 * to bmap on these blocks will fail.  However, we already have
 	 * the on disk address, so we have to set the b_blkno field
 	 * explicitly instead of letting bread do everything for us.
 	 */
 	vp = ITOV(ip);
 	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0);
 	if ((bp->b_flags & B_CACHE) == 0) {
 		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
 		bp->b_flags |= B_READ;
 		if (bp->b_bcount > bp->b_bufsize)
 			panic("ffs_indirtrunc: bad buffer size");
 		bp->b_blkno = dbn;
 		vfs_busy_pages(bp, 0);
 		VOP_STRATEGY(bp);
 		error = biowait(bp);
 	}
 	if (error) {
 		brelse(bp);
 		*countp = 0;
 		return (error);
 	}
 
 	bap = (ufs_daddr_t *)bp->b_data;
 	if (lastbn != -1) {
 		MALLOC(copy, ufs_daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
 		bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
 		bzero((caddr_t)&bap[last + 1],
 		    (u_int)(NINDIR(fs) - (last + 1)) * sizeof (ufs_daddr_t));
 		if ((vp->v_mount->mnt_flag & MNT_ASYNC) == 0) {
 			error = bwrite(bp);
 			if (error)
 				allerror = error;
 		} else {
 			bawrite(bp);
 		}
 		bap = copy;
 	}
 
 	/*
 	 * Recursively free totally unused blocks.
 	 */
 	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
 	    i--, nlbn += factor) {
 		nb = bap[i];
 		if (nb == 0)
 			continue;
 		if (level > SINGLE) {
 			if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
 			    (ufs_daddr_t)-1, level - 1, &blkcount))
 				allerror = error;
 			blocksreleased += blkcount;
 		}
 		ffs_blkfree(ip, nb, fs->fs_bsize);
 		blocksreleased += nblocks;
 	}
 
 	/*
 	 * Recursively free last partial block.
 	 */
 	if (level > SINGLE && lastbn >= 0) {
 		last = lastbn % factor;
 		nb = bap[i];
 		if (nb != 0) {
 			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
 			    last, level - 1, &blkcount);
 			if (error)
 				allerror = error;
 			blocksreleased += blkcount;
 		}
 	}
 	if (copy != NULL) {
 		FREE(copy, M_TEMP);
 	} else {
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 	}
 		
 	*countp = blocksreleased;
 	return (allerror);
 }
Index: head/sys/ufs/ffs/ffs_subr.c
===================================================================
--- head/sys/ufs/ffs/ffs_subr.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_subr.c	(revision 34266)
@@ -1,248 +1,272 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_subr.c	8.5 (Berkeley) 3/21/95
- * $Id: ffs_subr.c,v 1.18 1998/02/06 12:14:14 eivind Exp $
+ * $Id: ffs_subr.c,v 1.19 1998/02/13 00:20:36 bde Exp $
  */
 
 #include <sys/param.h>
 #include <ufs/ffs/fs.h>
 
 #ifndef KERNEL
 #include <ufs/ufs/dinode.h>
 #else
 #include "opt_ddb.h"
 
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/vnode.h>
 #include <sys/buf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #ifdef DDB
 static void	ffs_checkoverlap __P((struct buf *, struct inode *));
 #endif
 
 /*
  * Return buffer with the contents of block "offset" from the beginning of
  * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
  * remaining space in the directory.
  */
 int
 ffs_blkatoff(vp, offset, res, bpp)
 	struct vnode *vp;
 	off_t offset;
 	char **res;
 	struct buf **bpp;
 {
 	struct inode *ip;
 	register struct fs *fs;
 	struct buf *bp;
 	ufs_daddr_t lbn;
 	int bsize, error;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	lbn = lblkno(fs, offset);
 	bsize = blksize(fs, ip, lbn);
 
 	*bpp = NULL;
 	error = bread(vp, lbn, bsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 	if (res)
 		*res = (char *)bp->b_data + blkoff(fs, offset);
 	*bpp = bp;
 	return (0);
 }
 #endif
 
 /*
  * Update the frsum fields to reflect addition or deletion
  * of some frags.
  */
 void
 ffs_fragacct(fs, fragmap, fraglist, cnt)
 	struct fs *fs;
 	int fragmap;
 	int32_t fraglist[];
 	int cnt;
 {
 	int inblk;
 	register int field, subfield;
 	register int siz, pos;
 
 	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
 	fragmap <<= 1;
 	for (siz = 1; siz < fs->fs_frag; siz++) {
 		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
 			continue;
 		field = around[siz];
 		subfield = inside[siz];
 		for (pos = siz; pos <= fs->fs_frag; pos++) {
 			if ((fragmap & field) == subfield) {
 				fraglist[siz] += cnt;
 				pos += siz;
 				field <<= siz;
 				subfield <<= siz;
 			}
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 }
 
 #ifdef DDB
 static void
 ffs_checkoverlap(bp, ip)
 	struct buf *bp;
 	struct inode *ip;
 {
 	register struct buf *ebp, *ep;
 	register ufs_daddr_t start, last;
 	struct vnode *vp;
 
 	ebp = &buf[nbuf];
 	start = bp->b_blkno;
 	last = start + btodb(bp->b_bcount) - 1;
 	for (ep = buf; ep < ebp; ep++) {
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 		    ep->b_vp == NULLVP)
 			continue;
 		if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0,
 		    NULL, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
 		/* look for overlap */
 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
 			continue;
 		vprint("Disk overlap", vp);
 		(void)printf("\tstart %lu, end %lu overlap start %lu, end %lu\n",
 			(u_long)start, (u_long)last, (u_long)ep->b_blkno,
 			(u_long)(ep->b_blkno + btodb(ep->b_bcount) - 1));
 		panic("ffs_checkoverlap: Disk buffer overlap");
 	}
 }
 #endif /* DDB */
 
 /*
  * block operations
  *
  * check if a block is available
  */
 int
 ffs_isblock(fs, cp, h)
 	struct fs *fs;
 	unsigned char *cp;
 	ufs_daddr_t h;
 {
 	unsigned char mask;
 
 	switch ((int)fs->fs_frag) {
 	case 8:
 		return (cp[h] == 0xff);
 	case 4:
 		mask = 0x0f << ((h & 0x1) << 2);
 		return ((cp[h >> 1] & mask) == mask);
 	case 2:
 		mask = 0x03 << ((h & 0x3) << 1);
 		return ((cp[h >> 2] & mask) == mask);
 	case 1:
 		mask = 0x01 << (h & 0x7);
 		return ((cp[h >> 3] & mask) == mask);
 	default:
 		panic("ffs_isblock");
+	}
+}
+
+/*
+ * check if a block is free
+ */
+int
+ffs_isfreeblock(fs, cp, h)
+	struct fs *fs;
+	unsigned char *cp;
+	ufs_daddr_t h;
+{
+
+	switch ((int)fs->fs_frag) {
+	case 8:
+		return (cp[h] == 0);
+	case 4:
+		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
+	case 2:
+		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
+	case 1:
+		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
+	default:
+		panic("ffs_isfreeblock");
 	}
 }
 
 /*
  * take a block out of the map
  */
 void
 ffs_clrblock(fs, cp, h)
 	struct fs *fs;
 	u_char *cp;
 	ufs_daddr_t h;
 {
 
 	switch ((int)fs->fs_frag) {
 	case 8:
 		cp[h] = 0;
 		return;
 	case 4:
 		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
 		return;
 	case 2:
 		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
 		return;
 	case 1:
 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
 		return;
 	default:
 		panic("ffs_clrblock");
 	}
 }
 
 /*
  * put a block into the map
  */
 void
 ffs_setblock(fs, cp, h)
 	struct fs *fs;
 	unsigned char *cp;
 	ufs_daddr_t h;
 {
 
 	switch ((int)fs->fs_frag) {
 
 	case 8:
 		cp[h] = 0xff;
 		return;
 	case 4:
 		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
 		return;
 	case 2:
 		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
 		return;
 	case 1:
 		cp[h >> 3] |= (0x01 << (h & 0x7));
 		return;
 	default:
 		panic("ffs_setblock");
 	}
 }
Index: head/sys/ufs/ffs/ffs_vfsops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vfsops.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_vfsops.c	(revision 34266)
@@ -1,1223 +1,1264 @@
 /*
  * Copyright (c) 1989, 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vfsops.c	8.31 (Berkeley) 5/20/95
- * $Id: ffs_vfsops.c,v 1.74 1998/03/07 14:59:44 bde Exp $
+ * $Id: ffs_vfsops.c,v 1.75 1998/03/07 21:36:36 dyson Exp $
  */
 
 #include "opt_quota.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/fcntl.h>
 #include <sys/disklabel.h>
 #include <sys/malloc.h>
 
 #include <miscfs/specfs/specdev.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part");
 
 static int	ffs_sbupdate __P((struct ufsmount *, int));
 static int	ffs_reload __P((struct mount *,struct ucred *,struct proc *));
 static int	ffs_oldfscompat __P((struct fs *));
 static int	ffs_mount __P((struct mount *, char *, caddr_t,
 				struct nameidata *, struct proc *));
 static int	ffs_init __P((struct vfsconf *));
 
 static struct vfsops ufs_vfsops = {
 	ffs_mount,
 	ufs_start,
 	ffs_unmount,
 	ufs_root,
 	ufs_quotactl,
 	ffs_statfs,
 	ffs_sync,
 	ffs_vget,
 	vfs_vrele,
 	ffs_fhtovp,
 	ffs_vptofh,
 	ffs_init,
 };
 
 VFS_SET(ufs_vfsops, ufs, MOUNT_UFS, 0);
 
 /*
  * ffs_mount
  *
  * Called when mounting local physical media
  *
  * PARAMETERS:
  *		mountroot
  *			mp	mount point structure
  *			path	NULL (flag for root mount!!!)
  *			data	<unused>
  *			ndp	<unused>
  *			p	process (user credentials check [statfs])
  *
  *		mount
  *			mp	mount point structure
  *			path	path to mount point
  *			data	pointer to argument struct in user space
  *			ndp	mount point namei() return (used for
  *				credentials on reload), reused to look
  *				up block device.
  *			p	process (user credentials check)
  *
  * RETURNS:	0	Success
  *		!0	error number (errno.h)
  *
  * LOCK STATE:
  *
  *		ENTRY
  *			mount point is locked
  *		EXIT
  *			mount point is locked
  *
  * NOTES:
  *		A NULL path can be used for a flag since the mount
  *		system call will fail with EFAULT in copyinstr in
  *		namei() if it is a genuine NULL from the user.
  */
 static int
 ffs_mount( mp, path, data, ndp, p)
         struct mount		*mp;	/* mount struct pointer*/
         char			*path;	/* path to mount point*/
         caddr_t			data;	/* arguments to FS specific mount*/
         struct nameidata	*ndp;	/* mount point credentials*/
         struct proc		*p;	/* process requesting mount*/
 {
 	u_int		size;
 	int		err = 0;
 	struct vnode	*devvp;
 
 	struct ufs_args args;
 	struct ufsmount *ump = 0;
 	register struct fs *fs;
 	int error, flags;
 	mode_t accessmode;
 
 	/*
 	 * Use NULL path to flag a root mount
 	 */
 	if( path == NULL) {
 		/*
 		 ***
 		 * Mounting root file system
 		 ***
 		 */
 	
 		if ((err = bdevvp(rootdev, &rootvp))) {
 			printf("ffs_mountroot: can't find rootvp");
 			return (err);
 		}
 
 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if( ( err = ffs_mountfs(rootvp, mp, p, M_FFSNODE)) != 0) {
 			/* fs specific cleanup (if any)*/
 			goto error_1;
 		}
 
 		goto dostatfs;		/* success*/
 
 	}
 
 	/*
 	 ***
 	 * Mounting non-root file system or updating a file system
 	 ***
 	 */
 
 	/* copy in user arguments*/
 	err = copyin(data, (caddr_t)&args, sizeof (struct ufs_args));
 	if (err)
 		goto error_1;		/* can't get arguments*/
 
 	/*
 	 * If updating, check whether changing from read-only to
 	 * read/write; if there is no device name, that's all we do.
 	 * Disallow clearing MNT_NOCLUSTERR and MNT_NOCLUSTERW flags,
 	 * if block device requests.
 	 */
 	if (mp->mnt_flag & MNT_UPDATE) {
 		ump = VFSTOUFS(mp);
 		fs = ump->um_fs;
 		err = 0;
 		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw[major(ump->um_dev)]->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 		if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) {
 			flags = WRITECLOSE;
 			if (mp->mnt_flag & MNT_FORCE)
 				flags |= FORCECLOSE;
-			err = ffs_flushfiles(mp, flags, p);
+			if (mp->mnt_flag & MNT_SOFTDEP) {
+				err = softdep_flushfiles(mp, flags, p);
+			} else {
+				err = ffs_flushfiles(mp, flags, p);
+			}
 		}
 		if (!err && (mp->mnt_flag & MNT_RELOAD))
 			err = ffs_reload(mp, ndp->ni_cnd.cn_cred, p);
 		if (err) {
 			goto error_1;
 		}
 		if (fs->fs_ronly && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
 			if (!fs->fs_clean) {
 				if (mp->mnt_flag & MNT_FORCE) {
 					printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
 				} else {
 					printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",
 					    fs->fs_fsmnt);
 					err = EPERM;
 					goto error_1;
 				}
 			}
 
 			/*
 			 * If upgrade to read-write by non-root, then verify
 			 * that user has necessary permissions on the device.
 			 */
 			if (p->p_ucred->cr_uid != 0) {
 				devvp = ump->um_devvp;
 				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 				if (error = VOP_ACCESS(devvp, VREAD | VWRITE,
 				    p->p_ucred, p)) {
 					VOP_UNLOCK(devvp, 0, p);
 					return (error);
 				}
 				VOP_UNLOCK(devvp, 0, p);
 			}
 
 			fs->fs_ronly = 0;
 		}
 		if (fs->fs_ronly == 0) {
 			fs->fs_clean = 0;
 			ffs_sbupdate(ump, MNT_WAIT);
 		}
 		/* if not updating name...*/
 		if (args.fspec == 0) {
 			/*
 			 * Process export requests.  Jumping to "success"
 			 * will return the vfs_export() error code.
 			 */
 			err = vfs_export(mp, &ump->um_export, &args.export);
 			goto success;
 		}
 	}
 
 	/*
 	 * Not an update, or updating the name: look up the name
 	 * and verify that it refers to a sensible block device.
 	 */
 	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
 	err = namei(ndp);
 	if (err) {
 		/* can't get devvp!*/
 		goto error_1;
 	}
 
 	devvp = ndp->ni_vp;
 
 	if (devvp->v_type != VBLK) {
 		err = ENOTBLK;
 		goto error_2;
 	}
 	if (major(devvp->v_rdev) >= nblkdev) {
 		err = ENXIO;
 		goto error_2;
 	}
 
 	/*
 	 * If mount by non-root, then verify that user has necessary
 	 * permissions on the device.
 	 */
 	if (p->p_ucred->cr_uid != 0) {
 		accessmode = VREAD;
 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
 			accessmode |= VWRITE;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
 		if (error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p)) {
 			vput(devvp);
 			return (error);
 		}
 		VOP_UNLOCK(devvp, 0, p);
 	}
 
 	if (mp->mnt_flag & MNT_UPDATE) {
 		/*
 		 ********************
 		 * UPDATE
 		 ********************
 		 */
 
 		if (devvp != ump->um_devvp)
 			err = EINVAL;	/* needs translation */
 		else
 			vrele(devvp);
 		/*
 		 * Update device name only on success
 		 */
 		if( !err) {
 			/* Save "mounted from" info for mount point (NULL pad)*/
 			copyinstr(	args.fspec,
 					mp->mnt_stat.f_mntfromname,
 					MNAMELEN - 1,
 					&size);
 			bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 		}
 	} else {
 		/*
 		 ********************
 		 * NEW MOUNT
 		 ********************
 		 */
 
 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
 			mp->mnt_flag |= MNT_NOCLUSTERR;
 		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERW)
 			mp->mnt_flag |= MNT_NOCLUSTERW;
 
 		/*
 		 * Since this is a new mount, we want the names for
 		 * the device and the mount point copied in.  If an
 		 * error occurs,  the mountpoint is discarded by the
 		 * upper level code.
 		 */
 		/* Save "last mounted on" info for mount point (NULL pad)*/
 		copyinstr(	path,				/* mount point*/
 				mp->mnt_stat.f_mntonname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
 
 		/* Save "mounted from" info for mount point (NULL pad)*/
 		copyinstr(	args.fspec,			/* device name*/
 				mp->mnt_stat.f_mntfromname,	/* save area*/
 				MNAMELEN - 1,			/* max size*/
 				&size);				/* real size*/
 		bzero( mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
 
 		err = ffs_mountfs(devvp, mp, p, M_FFSNODE);
 	}
 	if (err) {
 		goto error_2;
 	}
 
 dostatfs:
 	/*
 	 * Initialize FS stat information in mount struct; uses both
 	 * mp->mnt_stat.f_mntonname and mp->mnt_stat.f_mntfromname
 	 *
 	 * This code is common to root and non-root mounts
 	 */
 	(void)VFS_STATFS(mp, &mp->mnt_stat, p);
 
 	goto success;
 
 
 error_2:	/* error with devvp held*/
 
 	/* release devvp before failing*/
 	vrele(devvp);
 
 error_1:	/* no state to back out*/
 
 success:
 	return( err);
 }
 
 /*
  * Reload all incore data for a filesystem (used after running fsck on
  * the root filesystem and finding things to fix). The filesystem must
  * be mounted read-only.
  *
  * Things to do to update the mount:
  *	1) invalidate all cached meta-data.
  *	2) re-read superblock from disk.
  *	3) re-read summary information from disk.
  *	4) invalidate all inactive vnodes.
  *	5) invalidate all cached file data.
  *	6) re-read inode data for all active vnodes.
  */
 static int
 ffs_reload(mp, cred, p)
 	register struct mount *mp;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct vnode *vp, *nvp, *devvp;
 	struct inode *ip;
 	struct csum *space;
 	struct buf *bp;
 	struct fs *fs, *newfs;
 	struct partinfo dpart;
 	dev_t dev;
 	int i, blks, size, error;
 	int32_t *lp;
 
 	if ((mp->mnt_flag & MNT_RDONLY) == 0)
 		return (EINVAL);
 	/*
 	 * Step 1: invalidate all cached meta-data.
 	 */
 	devvp = VFSTOUFS(mp)->um_devvp;
-	if (vinvalbuf(devvp, 0, cred, p, 0, 0))
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, 0, cred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
 		panic("ffs_reload: dirty1");
 
 	dev = devvp->v_rdev;
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  This excludes the original MFS implementation.
 	 * Note that it is optional that the backing device be VMIOed.  This
 	 * increases the opportunity for metadata caching.
 	 */
 	if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) {
 		simple_lock(&devvp->v_interlock);
 		vfs_object_create(devvp, p, p->p_ucred, 0);
 	}
 
 	/*
 	 * Step 2: re-read superblock from disk.
 	 */
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 	if (error = bread(devvp, (ufs_daddr_t)(SBOFF/size), SBSIZE, NOCRED,&bp))
 		return (error);
 	newfs = (struct fs *)bp->b_data;
 	if (newfs->fs_magic != FS_MAGIC || newfs->fs_bsize > MAXBSIZE ||
 		newfs->fs_bsize < sizeof(struct fs)) {
 			brelse(bp);
 			return (EIO);		/* XXX needs translation */
 	}
 	fs = VFSTOUFS(mp)->um_fs;
 	/*
 	 * Copy pointer fields back into superblock before copying in	XXX
 	 * new superblock. These should really be in the ufsmount.	XXX
 	 * Note that important parameters (eg fs_ncg) are unchanged.
 	 */
 	bcopy(&fs->fs_csp[0], &newfs->fs_csp[0], sizeof(fs->fs_csp));
 	newfs->fs_maxcluster = fs->fs_maxcluster;
 	bcopy(newfs, fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 	brelse(bp);
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Step 3: re-read summary information from disk.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = fs->fs_csp[0];
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    NOCRED, &bp);
 		if (error)
 			return (error);
 		bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size);
 		brelse(bp);
 	}
 	/*
 	 * We no longer know anything about clusters per cylinder group.
 	 */
 	if (fs->fs_contigsumsize > 0) {
 		lp = fs->fs_maxcluster;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 
 loop:
 	simple_lock(&mntvnode_slock);
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		if (vp->v_mount != mp) {
 			simple_unlock(&mntvnode_slock);
 			goto loop;
 		}
 		nvp = vp->v_mntvnodes.le_next;
 		/*
 		 * Step 4: invalidate all inactive vnodes.
 		 */
 		if (vrecycle(vp, &mntvnode_slock, p))
 			goto loop;
 		/*
 		 * Step 5: invalidate all cached file data.
 		 */
 		simple_lock(&vp->v_interlock);
 		simple_unlock(&mntvnode_slock);
 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
 			goto loop;
 		}
 		if (vinvalbuf(vp, 0, cred, p, 0, 0))
 			panic("ffs_reload: dirty2");
 		/*
 		 * Step 6: re-read inode data for all active vnodes.
 		 */
 		ip = VTOI(vp);
 		error =
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
 			vput(vp);
 			return (error);
 		}
 		ip->i_din = *((struct dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
+		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
 		vput(vp);
 		simple_lock(&mntvnode_slock);
 	}
 	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
 /*
  * Common code for mount and mountroot
  */
 int
 ffs_mountfs(devvp, mp, p, malloctype)
 	register struct vnode *devvp;
 	struct mount *mp;
 	struct proc *p;
 	struct malloc_type *malloctype;
 {
 	register struct ufsmount *ump;
 	struct buf *bp;
 	register struct fs *fs;
+	struct cg *cgp;
 	dev_t dev;
 	struct partinfo dpart;
+	struct csum cstotal;
 	caddr_t base, space;
-	int error, i, blks, size, ronly;
+	int error, i, cyl, blks, size, ronly;
 	int32_t *lp;
 	struct ucred *cred;
 	u_int64_t maxfilesize;					/* XXX */
 	u_int strsize;
 	int ncount;
 
 	dev = devvp->v_rdev;
 	cred = p ? p->p_ucred : NOCRED;
 	/*
 	 * Disallow multiple mounts of the same device.
 	 * Disallow mounting of a device that is currently in use
 	 * (except for root, which might share swap device for miniroot).
 	 * Flush out any old buffers remaining from a previous use.
 	 */
 	error = vfs_mountedon(devvp);
 	if (error)
 		return (error);
 	ncount = vcount(devvp);
 
 	if (ncount > 1 && devvp != rootvp)
 		return (EBUSY);
-	if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0))
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
 		return (error);
 
 	/*
 	 * Only VMIO the backing device if the backing device is a real
 	 * block device.  This excludes the original MFS implementation.
 	 * Note that it is optional that the backing device be VMIOed.  This
 	 * increases the opportunity for metadata caching.
 	 */
 	if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) {
 		simple_lock(&devvp->v_interlock);
 		vfs_object_create(devvp, p, p->p_ucred, 0);
 	}
 
 
 	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
 	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
 	if (error)
 		return (error);
 
 	if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0)
 		size = DEV_BSIZE;
 	else
 		size = dpart.disklab->d_secsize;
 
 	bp = NULL;
 	ump = NULL;
 	if (error = bread(devvp, SBLOCK, SBSIZE, cred, &bp))
 		goto out;
 	fs = (struct fs *)bp->b_data;
 	if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE ||
 	    fs->fs_bsize < sizeof(struct fs)) {
 		error = EINVAL;		/* XXX needs translation */
 		goto out;
 	}
 	fs->fs_fmod = 0;
 	if (!fs->fs_clean) {
 		if (ronly || (mp->mnt_flag & MNT_FORCE)) {
 			printf("WARNING: %s was not properly dismounted.\n",fs->fs_fsmnt);
 		} else {
 			printf("WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck.\n",fs->fs_fsmnt);
 			error = EPERM;
 			goto out;
 		}
 	}
 	/* XXX updating 4.2 FFS superblocks trashes rotational layout tables */
 	if (fs->fs_postblformat == FS_42POSTBLFMT && !ronly) {
 		error = EROFS;          /* needs translation */
 		goto out;
 	}
 	ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK);
 	bzero((caddr_t)ump, sizeof *ump);
 	ump->um_malloctype = malloctype;
 	ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT,
 	    M_WAITOK);
 	ump->um_blkatoff = ffs_blkatoff;
 	ump->um_truncate = ffs_truncate;
 	ump->um_update = ffs_update;
 	ump->um_valloc = ffs_valloc;
 	ump->um_vfree = ffs_vfree;
 	bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize);
 	if (fs->fs_sbsize < SBSIZE)
 		bp->b_flags |= B_INVAL;
 	brelse(bp);
 	bp = NULL;
 	fs = ump->um_fs;
 	fs->fs_ronly = ronly;
 	if (ronly == 0) {
 		fs->fs_fmod = 1;
 		fs->fs_clean = 0;
 	}
 	size = fs->fs_cssize;
 	blks = howmany(size, fs->fs_fsize);
 	if (fs->fs_contigsumsize > 0)
 		size += fs->fs_ncg * sizeof(int32_t);
 	base = space = malloc((u_long)size, M_UFSMNT, M_WAITOK);
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size,
 		    cred, &bp)) {
 			free(base, M_UFSMNT);
 			goto out;
 		}
 		bcopy(bp->b_data, space, (u_int)size);
 		fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space;
 		space += size;
 		brelse(bp);
 		bp = NULL;
 	}
 	if (fs->fs_contigsumsize > 0) {
 		fs->fs_maxcluster = lp = (int32_t *)space;
 		for (i = 0; i < fs->fs_ncg; i++)
 			*lp++ = fs->fs_contigsumsize;
 	}
 	mp->mnt_data = (qaddr_t)ump;
 	mp->mnt_stat.f_fsid.val[0] = (long)dev;
 	if (fs->fs_id[0] != 0 && fs->fs_id[1] != 0)
 		mp->mnt_stat.f_fsid.val[1] = fs->fs_id[1];
 	else
 		mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
 	mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen;
 	mp->mnt_flag |= MNT_LOCAL;
 	ump->um_mountp = mp;
 	ump->um_dev = dev;
 	ump->um_devvp = devvp;
 	ump->um_nindir = fs->fs_nindir;
 	ump->um_bptrtodb = fs->fs_fsbtodb;
 	ump->um_seqinc = fs->fs_frag;
 	for (i = 0; i < MAXQUOTAS; i++)
 		ump->um_quotas[i] = NULLVP;
-	devvp->v_specflags |= SI_MOUNTEDON;
+	devvp->v_specmountpoint = mp;
 	ffs_oldfscompat(fs);
 
 	/*
 	 * Set FS local "last mounted on" information (NULL pad)
 	 */
 	copystr(	mp->mnt_stat.f_mntonname,	/* mount point*/
 			fs->fs_fsmnt,			/* copy area*/
 			sizeof(fs->fs_fsmnt) - 1,	/* max size*/
 			&strsize);			/* real size*/
 	bzero( fs->fs_fsmnt + strsize, sizeof(fs->fs_fsmnt) - strsize);
 
 	if( mp->mnt_flag & MNT_ROOTFS) {
 		/*
 		 * Root mount; update timestamp in mount structure.
 		 * this will be used by the common root mount code
 		 * to update the system clock.
 		 */
 		mp->mnt_time = fs->fs_time;
 	}
 
 	ump->um_savedmaxfilesize = fs->fs_maxfilesize;		/* XXX */
 	maxfilesize = (u_int64_t)0x40000000 * fs->fs_bsize - 1;	/* XXX */
 	if (fs->fs_maxfilesize > maxfilesize)			/* XXX */
 		fs->fs_maxfilesize = maxfilesize;		/* XXX */
 	if (ronly == 0) {
+		if ((fs->fs_flags & FS_DOSOFTDEP) &&
+		    (error = softdep_mount(devvp, mp, fs, cred)) != 0) {
+			free(base, M_UFSMNT);
+			goto out;
+		}
 		fs->fs_clean = 0;
 		(void) ffs_sbupdate(ump, MNT_WAIT);
 	}
 	return (0);
 out:
+	devvp->v_specmountpoint = NULL;
 	if (bp)
 		brelse(bp);
 	(void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, cred, p);
 	if (ump) {
 		free(ump->um_fs, M_UFSMNT);
 		free(ump, M_UFSMNT);
 		mp->mnt_data = (qaddr_t)0;
 	}
 	return (error);
 }
 
 /*
  * Sanity checks for old file systems.
  *
  * XXX - goes away some day.
  */
 static int
 ffs_oldfscompat(fs)
 	struct fs *fs;
 {
 
 	fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect);	/* XXX */
 	fs->fs_interleave = max(fs->fs_interleave, 1);		/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		fs->fs_nrpos = 8;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 #if 0
 		int i;						/* XXX */
 		u_int64_t sizepb = fs->fs_bsize;		/* XXX */
 								/* XXX */
 		fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1;	/* XXX */
 		for (i = 0; i < NIADDR; i++) {			/* XXX */
 			sizepb *= NINDIR(fs);			/* XXX */
 			fs->fs_maxfilesize += sizepb;		/* XXX */
 		}						/* XXX */
 #endif
 		fs->fs_maxfilesize = (u_quad_t) 1LL << 39;
 		fs->fs_qbmask = ~fs->fs_bmask;			/* XXX */
 		fs->fs_qfmask = ~fs->fs_fmask;			/* XXX */
 	}							/* XXX */
 	return (0);
 }
 
 /*
  * unmount system call
  */
 int
 ffs_unmount(mp, mntflags, p)
 	struct mount *mp;
 	int mntflags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct fs *fs;
 	int error, flags;
 
 	flags = 0;
 	if (mntflags & MNT_FORCE) {
 		flags |= FORCECLOSE;
 	}
-	error = ffs_flushfiles(mp, flags, p);
-	if (error)
-		return (error);
+	if (mp->mnt_flag & MNT_SOFTDEP) {
+		if ((error = softdep_flushfiles(mp, flags, p)) != 0)
+			return (error);
+	} else {
+		if ((error = ffs_flushfiles(mp, flags, p)) != 0)
+			return (error);
+	}
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_ronly == 0) {
 		fs->fs_clean = 1;
 		error = ffs_sbupdate(ump, MNT_WAIT);
 		if (error) {
 			fs->fs_clean = 0;
 			return (error);
 		}
 	}
-	ump->um_devvp->v_specflags &= ~SI_MOUNTEDON;
+	ump->um_devvp->v_specmountpoint = NULL;
 
 	vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0);
 	error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE,
 		NOCRED, p);
 
 	vrele(ump->um_devvp);
 
 	free(fs->fs_csp[0], M_UFSMNT);
 	free(fs, M_UFSMNT);
 	free(ump, M_UFSMNT);
 	mp->mnt_data = (qaddr_t)0;
 	mp->mnt_flag &= ~MNT_LOCAL;
 	return (error);
 }
 
 /*
  * Flush out all the files in a filesystem.
  */
 int
 ffs_flushfiles(mp, flags, p)
 	register struct mount *mp;
 	int flags;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(mp);
 #ifdef QUOTA
 	if (mp->mnt_flag & MNT_QUOTA) {
 		int i;
 		error = vflush(mp, NULLVP, SKIPSYSTEM|flags);
 		if (error)
 			return (error);
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if (ump->um_quotas[i] == NULLVP)
 				continue;
 			quotaoff(p, mp, i);
 		}
 		/*
 		 * Here we fall through to vflush again to ensure
 		 * that we have gotten rid of all the system vnodes.
 		 */
 	}
 #endif
-	error = vflush(mp, NULLVP, flags);
+        /*
+	 * Flush all the files.
+	 */
+	if ((error = vflush(mp, NULL, flags)) != 0)
+		return (error);
+	/*
+	 * Flush filesystem metadata.
+	 */
+	vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_FSYNC(ump->um_devvp, p->p_ucred, MNT_WAIT, p);
+	VOP_UNLOCK(ump->um_devvp, 0, p);
 	return (error);
 }
 
 /*
  * Get file system statistics.
  */
 int
 ffs_statfs(mp, sbp, p)
 	struct mount *mp;
 	register struct statfs *sbp;
 	struct proc *p;
 {
 	register struct ufsmount *ump;
 	register struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (fs->fs_magic != FS_MAGIC)
 		panic("ffs_statfs");
 	sbp->f_bsize = fs->fs_fsize;
 	sbp->f_iosize = fs->fs_bsize;
 	sbp->f_blocks = fs->fs_dsize;
 	sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag +
 		fs->fs_cstotal.cs_nffree;
 	sbp->f_bavail = freespace(fs, fs->fs_minfree);
 	sbp->f_files =  fs->fs_ncg * fs->fs_ipg - ROOTINO;
 	sbp->f_ffree = fs->fs_cstotal.cs_nifree;
 	if (sbp != &mp->mnt_stat) {
 		sbp->f_type = mp->mnt_vfc->vfc_typenum;
 		bcopy((caddr_t)mp->mnt_stat.f_mntonname,
 			(caddr_t)&sbp->f_mntonname[0], MNAMELEN);
 		bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
 			(caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
 	}
 	return (0);
 }
 
 /*
  * Go through the disk queues to initiate sandbagged IO;
  * go through the inodes to write those that have been modified;
  * initiate the writing of the super block if it has been modified.
  *
  * Note: we are always called with the filesystem marked `MPBUSY'.
  */
 int
 ffs_sync(mp, waitfor, cred, p)
 	struct mount *mp;
 	int waitfor;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct vnode *nvp, *vp;
 	struct inode *ip;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct fs *fs;
 	struct timeval tv;
 	int error, allerror = 0;
 
 	fs = ump->um_fs;
 	if (fs->fs_fmod != 0 && fs->fs_ronly != 0) {		/* XXX */
 		printf("fs = %s\n", fs->fs_fsmnt);
 		panic("ffs_sync: rofs mod");
 	}
 	/*
 	 * Write back each (modified) inode.
 	 */
 	simple_lock(&mntvnode_slock);
 loop:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
 		/*
 		 * If the vnode that we are about to sync is no longer
 		 * associated with this mount point, start over.
 		 */
 		if (vp->v_mount != mp)
 			goto loop;
 		simple_lock(&vp->v_interlock);
 		nvp = vp->v_mntvnodes.le_next;
 		ip = VTOI(vp);
-		if (((ip->i_flag &
+		if ((vp->v_type == VNON) || ((ip->i_flag &
 		     (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) &&
-		    vp->v_dirtyblkhd.lh_first == NULL) {
+		    ((vp->v_dirtyblkhd.lh_first == NULL) || (waitfor == MNT_LAZY))) {
 			simple_unlock(&vp->v_interlock);
 			continue;
 		}
 		if (vp->v_type != VCHR) {
 			simple_unlock(&mntvnode_slock);
 			error =
 			  vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 			if (error) {
 				simple_lock(&mntvnode_slock);
 				if (error == ENOENT)
 					goto loop;
 				continue;
 			}
 			if (error = VOP_FSYNC(vp, cred, waitfor, p))
 				allerror = error;
 			VOP_UNLOCK(vp, 0, p);
 			vrele(vp);
 			simple_lock(&mntvnode_slock);
 		} else {
 			simple_unlock(&mntvnode_slock);
 			simple_unlock(&vp->v_interlock);
 			gettime(&tv);
 			/* UFS_UPDATE(vp, &tv, &tv, waitfor == MNT_WAIT); */
 			UFS_UPDATE(vp, &tv, &tv, 0);
 			simple_lock(&mntvnode_slock);
 		}
 	}
 	simple_unlock(&mntvnode_slock);
 	/*
 	 * Force stale file system control information to be flushed.
 	 */
-	error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p);
-	if (error)
-		allerror = error;
+	if (waitfor != MNT_LAZY) {
+		if (ump->um_mountp->mnt_flag & MNT_SOFTDEP)
+			waitfor = MNT_NOWAIT;
+		vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+		if ((error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) != 0)
+			allerror = error;
+		VOP_UNLOCK(ump->um_devvp, 0, p);
+	}
 #ifdef QUOTA
 	qsync(mp);
 #endif
 	/*
 	 * Write back modified superblock.
 	 */
-	if (fs->fs_fmod != 0) {
-		fs->fs_fmod = 0;
-		fs->fs_time = time.tv_sec;
-		if (error = ffs_sbupdate(ump, waitfor))
-			allerror = error;
-	}
+	if (fs->fs_fmod != 0 && (error = ffs_sbupdate(ump, waitfor)) != 0)
+		allerror = error;
 	return (allerror);
 }
 
 /*
  * Look up a FFS dinode number to find its incore vnode, otherwise read it
  * in from disk.  If it is in core, wait for the lock bit to clear, then
  * return the inode locked.  Detection and handling of mount points must be
  * done by the calling routine.
  */
 static int ffs_inode_hash_lock;
 
 int
 ffs_vget(mp, ino, vpp)
 	struct mount *mp;
 	ino_t ino;
 	struct vnode **vpp;
 {
 	struct fs *fs;
 	struct inode *ip;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct vnode *vp;
 	dev_t dev;
 	int error;
 
 	ump = VFSTOUFS(mp);
 	dev = ump->um_dev;
 restart:
 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
 		return (0);
 	}
 
 	/*
 	 * Lock out the creation of new entries in the FFS hash table in
 	 * case getnewvnode() or MALLOC() blocks, otherwise a duplicate
 	 * may occur!
 	 */
 	if (ffs_inode_hash_lock) {
 		while (ffs_inode_hash_lock) {
 			ffs_inode_hash_lock = -1;
 			tsleep(&ffs_inode_hash_lock, PVM, "ffsvgt", 0);
 		}
 		goto restart;
 	}
 	ffs_inode_hash_lock = 1;
 
 	/*
 	 * If this MALLOC() is performed after the getnewvnode()
 	 * it might block, leaving a vnode with a NULL v_data to be
 	 * found by ffs_sync() if a sync happens to fire right then,
 	 * which will cause a panic because ffs_sync() blindly
 	 * dereferences vp->v_data (as well it should).
 	 */
 	MALLOC(ip, struct inode *, sizeof(struct inode), 
 	    ump->um_malloctype, M_WAITOK);
 
 	/* Allocate a new vnode/inode. */
 	error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp);
 	if (error) {
 		if (ffs_inode_hash_lock < 0)
 			wakeup(&ffs_inode_hash_lock);
 		ffs_inode_hash_lock = 0;
 		*vpp = NULL;
 		FREE(ip, ump->um_malloctype);
 		return (error);
 	}
 	bzero((caddr_t)ip, sizeof(struct inode));
 	lockinit(&ip->i_lock, PINOD, "inode", 0, 0);
 	vp->v_data = ip;
 	ip->i_vnode = vp;
 	ip->i_fs = fs = ump->um_fs;
 	ip->i_dev = dev;
 	ip->i_number = ino;
 #ifdef QUOTA
 	{
 		int i;
 		for (i = 0; i < MAXQUOTAS; i++)
 			ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	/*
 	 * Put it onto its hash chain and lock it so that other requests for
 	 * this inode will block if they arrive while we are sleeping waiting
 	 * for old data structures to be purged or for the contents of the
 	 * disk portion of this inode to be read.
 	 */
 	ufs_ihashins(ip);
 
 	if (ffs_inode_hash_lock < 0)
 		wakeup(&ffs_inode_hash_lock);
 	ffs_inode_hash_lock = 0;
 
 	/* Read in the disk contents for the inode, copy into the inode. */
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)),
 	    (int)fs->fs_bsize, NOCRED, &bp);
 	if (error) {
 		/*
 		 * The inode does not contain anything useful, so it would
 		 * be misleading to leave it on its hash chain. With mode
 		 * still zero, it will be unlinked and returned to the free
 		 * list by vput().
 		 */
 		brelse(bp);
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+	if (DOINGSOFTDEP(vp))
+		softdep_load_inodeblock(ip);
+	else
+		ip->i_effnlink = ip->i_nlink;
 	bqrelse(bp);
 
 	/*
 	 * Initialize the vnode from the inode, check for aliases.
 	 * Note that the underlying vnode may have changed.
 	 */
 	error = ufs_vinit(mp, ffs_specop_p, ffs_fifoop_p, &vp);
 	if (error) {
 		vput(vp);
 		*vpp = NULL;
 		return (error);
 	}
 	/*
 	 * Finish inode initialization now that aliasing has been resolved.
 	 */
 	ip->i_devvp = ump->um_devvp;
 	VREF(ip->i_devvp);
 	/*
 	 * Set up a generation number for this inode if it does not
 	 * already have one. This should only happen on old filesystems.
 	 */
 	if (ip->i_gen == 0) {
 		ip->i_gen = random() / 2 + 1;
 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
 			ip->i_flag |= IN_MODIFIED;
 	}
 	/*
 	 * Ensure that uid and gid are correct. This is a temporary
 	 * fix until fsck has been changed to do the update.
 	 */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {		/* XXX */
 		ip->i_uid = ip->i_din.di_ouid;		/* XXX */
 		ip->i_gid = ip->i_din.di_ogid;		/* XXX */
 	}						/* XXX */
 
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * File handle to vnode
  *
  * Have to be really careful about stale file handles:
  * - check that the inode number is valid
  * - call ffs_vget() to get the locked inode
  * - check for an unallocated inode (i_mode == 0)
  * - check that the given client host has export rights and return
  *   those rights via. exflagsp and credanonp
  */
 int
 ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
 	register struct mount *mp;
 	struct fid *fhp;
 	struct sockaddr *nam;
 	struct vnode **vpp;
 	int *exflagsp;
 	struct ucred **credanonp;
 {
 	register struct ufid *ufhp;
 	struct fs *fs;
 
 	ufhp = (struct ufid *)fhp;
 	fs = VFSTOUFS(mp)->um_fs;
 	if (ufhp->ufid_ino < ROOTINO ||
 	    ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg)
 		return (ESTALE);
 	return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp));
 }
 
 /*
  * Vnode pointer to File handle
  */
 /* ARGSUSED */
 int
 ffs_vptofh(vp, fhp)
 	struct vnode *vp;
 	struct fid *fhp;
 {
 	register struct inode *ip;
 	register struct ufid *ufhp;
 
 	ip = VTOI(vp);
 	ufhp = (struct ufid *)fhp;
 	ufhp->ufid_len = sizeof(struct ufid);
 	ufhp->ufid_ino = ip->i_number;
 	ufhp->ufid_gen = ip->i_gen;
 	return (0);
 }
 
 /*
  * Initialize the filesystem; just use ufs_init.
  */
 static int
 ffs_init(vfsp)
 	struct vfsconf *vfsp;
 {
 
+	softdep_initialize();
 	return (ufs_init(vfsp));
 }
 
 /*
  * Write a superblock and associated information back to disk.
  */
 static int
 ffs_sbupdate(mp, waitfor)
 	struct ufsmount *mp;
 	int waitfor;
 {
 	register struct fs *dfs, *fs = mp->um_fs;
 	register struct buf *bp;
 	int blks;
 	caddr_t space;
 	int i, size, error, allerror = 0;
 
 	/*
 	 * First write back the summary information.
 	 */
 	blks = howmany(fs->fs_cssize, fs->fs_fsize);
 	space = (caddr_t)fs->fs_csp[0];
 	for (i = 0; i < blks; i += fs->fs_frag) {
 		size = fs->fs_bsize;
 		if (i + fs->fs_frag > blks)
 			size = (blks - i) * fs->fs_fsize;
 		bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i),
 		    size, 0, 0);
 		bcopy(space, bp->b_data, (u_int)size);
 		space += size;
 		if (waitfor != MNT_WAIT)
 			bawrite(bp);
 		else if (error = bwrite(bp))
 			allerror = error;
 	}
 	/*
 	 * Now write back the superblock itself. If any errors occurred
 	 * up to this point, then fail so that the superblock avoids
 	 * being written out as clean.
 	 */
 	if (allerror)
 		return (allerror);
 	bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0);
+	fs->fs_fmod = 0;
+	fs->fs_time = time.tv_sec;
 	bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 	/* Restore compatibility to old file systems.		   XXX */
 	dfs = (struct fs *)bp->b_data;				/* XXX */
 	if (fs->fs_postblformat == FS_42POSTBLFMT)		/* XXX */
 		dfs->fs_nrpos = -1;				/* XXX */
 	if (fs->fs_inodefmt < FS_44INODEFMT) {			/* XXX */
 		int32_t *lp, tmp;				/* XXX */
 								/* XXX */
 		lp = (int32_t *)&dfs->fs_qbmask;		/* XXX */
 		tmp = lp[4];					/* XXX */
 		for (i = 4; i > 0; i--)				/* XXX */
 			lp[i] = lp[i-1];			/* XXX */
 		lp[0] = tmp;					/* XXX */
 	}							/* XXX */
 	dfs->fs_maxfilesize = mp->um_savedmaxfilesize;		/* XXX */
 	if (waitfor != MNT_WAIT)
 		bawrite(bp);
 	else if (error = bwrite(bp))
 		allerror = error;
 	return (allerror);
 }
Index: head/sys/ufs/ffs/ffs_vnops.c
===================================================================
--- head/sys/ufs/ffs/ffs_vnops.c	(revision 34265)
+++ head/sys/ufs/ffs/ffs_vnops.c	(revision 34266)
@@ -1,212 +1,253 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
- * $Id: ffs_vnops.c,v 1.42 1998/02/06 12:14:16 eivind Exp $
+ * $Id: ffs_vnops.c,v 1.43 1998/02/26 06:39:38 msmith Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 static int	ffs_fsync __P((struct vop_fsync_args *));
 static int	ffs_getpages __P((struct vop_getpages_args *));
 static int	ffs_putpages __P((struct vop_putpages_args *));
 static int	ffs_read __P((struct vop_read_args *));
 static int	ffs_write __P((struct vop_write_args *));
 
 /* Global vfs data structures for ufs. */
 vop_t **ffs_vnodeop_p;
 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ &vop_getpages_desc,		(vop_t *) ffs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) ffs_putpages },
 	{ &vop_read_desc,		(vop_t *) ffs_read },
+	{ &vop_balloc_desc,		(vop_t *) ffs_balloc },
 	{ &vop_reallocblks_desc,	(vop_t *) ffs_reallocblks },
 	{ &vop_write_desc,		(vop_t *) ffs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
 
 vop_t **ffs_specop_p;
 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_specop_opv_desc =
 	{ &ffs_specop_p, ffs_specop_entries };
 
 vop_t **ffs_fifoop_p;
 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatefifo },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_fifoop_opv_desc =
 	{ &ffs_fifoop_p, ffs_fifoop_entries };
 
 VNODEOP_SET(ffs_vnodeop_opv_desc);
 VNODEOP_SET(ffs_specop_opv_desc);
 VNODEOP_SET(ffs_fifoop_opv_desc);
 
 SYSCTL_NODE(_vfs, MOUNT_UFS, ffs, CTLFLAG_RW, 0, "FFS filesystem");
 
 #include <ufs/ufs/ufs_readwrite.c>
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ffs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
-	register struct vnode *vp = ap->a_vp;
-	register struct buf *bp;
+	struct vnode *vp = ap->a_vp;
+	struct buf *bp;
 	struct timeval tv;
 	struct buf *nbp;
-	int pass;
-	int s;
+	int s, error, passes, skipmeta;
 	daddr_t lbn;
 
 
 	if (vp->v_type == VBLK) {
 		lbn = INT_MAX;
 	} else {
 		struct inode *ip;
 		ip = VTOI(vp);
 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	}
 
-	pass = 0;
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
+	passes = NIADDR;
+	skipmeta = 0;
+	if (ap->a_waitfor == MNT_WAIT)
+		skipmeta = 1;
 loop:
 	s = splbio();
+loop2:
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
-		if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_lblkno < 0)))
+		/* 
+		 * First time through on a synchronous call,
+		 * or if it's already scheduled, skip to the next 
+		 * buffer
+		 */
+		if ((bp->b_flags & B_BUSY) ||
+		    ((skipmeta == 1) && (bp->b_lblkno < 0)))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
-
-		if (((bp->b_vp != vp) || (ap->a_waitfor != MNT_NOWAIT)) ||
-			((vp->v_type != VREG) && (vp->v_type != VBLK))) {
-
+		/*
+		 * If data is outstanding to another vnode, or we were
+		 * asked to wait for everything, or it's not a file or BDEV,
+		 * start the IO on this buffer immediatly.
+		 */
+		if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) ||
+		    ((vp->v_type != VREG) && (vp->v_type != VBLK))) {
 			bremfree(bp);
 			bp->b_flags |= B_BUSY;
 			splx(s);
 
 			/*
-			 * Wait for I/O associated with indirect blocks to complete,
-			 * since there is no way to quickly wait for them below.
+			 * Wait for I/O associated with indirect blocks to
+			 * complete, since there is no way to quickly wait
+			 * for them below.
 			 */
-			if ((bp->b_vp == vp) && (ap->a_waitfor == MNT_NOWAIT)) {
+			if ((bp->b_vp == vp) || (ap->a_waitfor != MNT_WAIT)) {
 				if (bp->b_flags & B_CLUSTEROK) {
 					bdwrite(bp);
 					(void) vfs_bio_awrite(bp);
 				} else {
 					(void) bawrite(bp);
 				}
 			} else {
 				(void) bwrite(bp);
 			}
-
 		} else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
-
+			/* 
+			 * If the buffer is for data that has been truncated
+			 * off the file, then throw it away.
+			 */
 			bremfree(bp);
 			bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE;
 			brelse(bp);
 			splx(s);
-
 		} else {
 			vfs_bio_awrite(bp);
 			splx(s);
 		}
 		goto loop;
 	}
+	/*
+	 * If we were asked to do this synchronously, then go back for
+	 * another pass, this time doing the metadata.
+	 */
+	if (skipmeta) {
+		skipmeta = 0;
+		goto loop2; /* stay within the splbio() */
+	}
 	splx(s);
 
-	if (pass == 0) {
-		pass = 1;
-		goto loop;
-	}
-
 	if (ap->a_waitfor == MNT_WAIT) {
 		s = splbio();
 		while (vp->v_numoutput) {
 			vp->v_flag |= VBWAIT;
 			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "ffsfsn", 0);
 		}
+		/* 
+		 * Ensure that any filesystem metatdata associated
+		 * with the vnode has been written.
+		 */
 		splx(s);
-#ifdef DIAGNOSTIC
+		if ((error = softdep_sync_metadata(ap)) != 0)
+			return (error);
+		s = splbio();
 		if (vp->v_dirtyblkhd.lh_first) {
-			vprint("ffs_fsync: dirty", vp);
-			goto loop;
-		}
+			/*
+			 * Block devices associated with filesystems may
+			 * have new I/O requests posted for them even if
+			 * the vnode is locked, so no amount of trying will
+			 * get them clean. Thus we give block devices a
+			 * good effort, then just give up. For all other file
+			 * types, go around and try again until it is clean.
+			 */
+			if (passes > 0) {
+				passes -= 1;
+				goto loop2;
+			}
+#ifdef DIAGNOSTIC
+			if (vp->v_type != VBLK)
+				vprint("ffs_fsync: dirty", vp);
 #endif
+		}
 	}
-
 	gettime(&tv);
-	return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT));
+	error = UFS_UPDATE(ap->a_vp, &tv, &tv, (ap->a_waitfor == MNT_WAIT));
+	if (error)
+		return (error);
+	if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT)
+		error = softdep_fsync(vp);
+	return (error);
 }
Index: head/sys/ufs/ffs/fs.h
===================================================================
--- head/sys/ufs/ffs/fs.h	(revision 34265)
+++ head/sys/ufs/ffs/fs.h	(revision 34266)
@@ -1,510 +1,522 @@
 /*
  * Copyright (c) 1982, 1986, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)fs.h	8.13 (Berkeley) 3/21/95
- * $Id: fs.h,v 1.11 1997/03/23 20:08:22 guido Exp $
+ * $Id: fs.h,v 1.12 1997/03/24 03:19:37 bde Exp $
  */
 
 #ifndef _UFS_FFS_FS_H_
 #define _UFS_FFS_FS_H_
 
 /*
  * Each disk drive contains some number of file systems.
  * A file system consists of a number of cylinder groups.
  * Each cylinder group has inodes and data.
  *
  * A file system is described by its super-block, which in turn
  * describes the cylinder groups.  The super-block is critical
  * data and is replicated in each cylinder group to protect against
  * catastrophic loss.  This is done at `newfs' time and the critical
  * super-block data does not change, so the copies need not be
  * referenced further unless disaster strikes.
  *
  * For file system fs, the offsets of the various blocks of interest
  * are given in the super block as:
  *	[fs->fs_sblkno]		Super-block
  *	[fs->fs_cblkno]		Cylinder group block
  *	[fs->fs_iblkno]		Inode blocks
  *	[fs->fs_dblkno]		Data blocks
  * The beginning of cylinder group cg in fs, is given by
  * the ``cgbase(fs, cg)'' macro.
  *
  * The first boot and super blocks are given in absolute disk addresses.
  * The byte-offset forms are preferred, as they don't imply a sector size.
  */
 #define BBSIZE		8192
 #define SBSIZE		8192
 #define	BBOFF		((off_t)(0))
 #define	SBOFF		((off_t)(BBOFF + BBSIZE))
 #define	BBLOCK		((ufs_daddr_t)(0))
 #define	SBLOCK		((ufs_daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE))
 
 /*
  * Addresses stored in inodes are capable of addressing fragments
  * of `blocks'. File system blocks of at most size MAXBSIZE can
  * be optionally broken into 2, 4, or 8 pieces, each of which is
  * addressable; these pieces may be DEV_BSIZE, or some multiple of
  * a DEV_BSIZE unit.
  *
  * Large files consist of exclusively large data blocks.  To avoid
  * undue wasted disk space, the last data block of a small file may be
  * allocated as only as many fragments of a large block as are
  * necessary.  The file system format retains only a single pointer
  * to such a fragment, which is a piece of a single large block that
  * has been divided.  The size of such a fragment is determinable from
  * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
  *
  * The file system records space availability at the fragment level;
  * to determine block availability, aligned fragments are examined.
  */
 
 /*
  * MINBSIZE is the smallest allowable block size.
  * In order to insure that it is possible to create files of size
  * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
  * MINBSIZE must be big enough to hold a cylinder group block,
  * thus changes to (struct cg) must keep its size within MINBSIZE.
  * Note that super blocks are always of size SBSIZE,
  * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
  */
 #define MINBSIZE	4096
 
 /*
  * The path name on which the file system is mounted is maintained
  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
  * the super block for this name.
  */
 #define MAXMNTLEN	512
 
 /*
  * The limit on the amount of summary information per file system
  * is defined by MAXCSBUFS. It is currently parameterized for a
  * size of 128 bytes (2 million cylinder groups on machines with
  * 32-bit pointers, and 1 million on 64-bit machines). One pointer
  * is taken away to point to an array of cluster sizes that is
  * computed as cylinder groups are inspected.
  */
 #define	MAXCSBUFS	((128 / sizeof(void *)) - 1)
 
 /*
  * A summary of contiguous blocks of various sizes is maintained
  * in each cylinder group. Normally this is set by the initial
  * value of fs_maxcontig. To conserve space, a maximum summary size
  * is set by FS_MAXCONTIG.
  */
 #define FS_MAXCONTIG	16
 
 /*
  * MINFREE gives the minimum acceptable percentage of file system
  * blocks which may be free. If the freelist drops below this level
  * only the superuser may continue to allocate blocks. This may
  * be set to 0 if no reserve of free blocks is deemed necessary,
  * however throughput drops by fifty percent if the file system
  * is run at between 95% and 100% full; thus the minimum default
  * value of fs_minfree is 5%. However, to get good clustering
  * performance, 10% is a better choice. hence we use 10% as our
  * default value. With 10% free space, fragmentation is not a
  * problem, so we choose to optimize for time.
  */
 #define MINFREE		8
 #define DEFAULTOPT	FS_OPTTIME
 
 /*
  * Per cylinder group information; summarized in blocks allocated
  * from first cylinder group data blocks.  These blocks have to be
  * read in from fs_csaddr (size fs_cssize) in addition to the
  * super block.
  *
  * N.B. sizeof(struct csum) must be a power of two in order for
  * the ``fs_cs'' macro to work (see below).
  */
 struct csum {
 	int32_t	cs_ndir;		/* number of directories */
 	int32_t	cs_nbfree;		/* number of free blocks */
 	int32_t	cs_nifree;		/* number of free inodes */
 	int32_t	cs_nffree;		/* number of free frags */
 };
 
 /*
  * Super block for an FFS file system.
  */
 struct fs {
 	int32_t	 fs_firstfield;		/* historic file system linked list, */
 	int32_t	 fs_unused_1;		/*     used for incore super blocks */
 	ufs_daddr_t fs_sblkno;		/* addr of super-block in filesys */
 	ufs_daddr_t fs_cblkno;		/* offset of cyl-block in filesys */
 	ufs_daddr_t fs_iblkno;		/* offset of inode-blocks in filesys */
 	ufs_daddr_t fs_dblkno;		/* offset of first data after cg */
 	int32_t	 fs_cgoffset;		/* cylinder group offset in cylinder */
 	int32_t	 fs_cgmask;		/* used to calc mod fs_ntrak */
 	time_t 	 fs_time;		/* last time written */
 	int32_t	 fs_size;		/* number of blocks in fs */
 	int32_t	 fs_dsize;		/* number of data blocks in fs */
 	int32_t	 fs_ncg;		/* number of cylinder groups */
 	int32_t	 fs_bsize;		/* size of basic blocks in fs */
 	int32_t	 fs_fsize;		/* size of frag blocks in fs */
 	int32_t	 fs_frag;		/* number of frags in a block in fs */
 /* these are configuration parameters */
 	int32_t	 fs_minfree;		/* minimum percentage of free blocks */
 	int32_t	 fs_rotdelay;		/* num of ms for optimal next block */
 	int32_t	 fs_rps;		/* disk revolutions per second */
 /* these fields can be computed from the others */
 	int32_t	 fs_bmask;		/* ``blkoff'' calc of blk offsets */
 	int32_t	 fs_fmask;		/* ``fragoff'' calc of frag offsets */
 	int32_t	 fs_bshift;		/* ``lblkno'' calc of logical blkno */
 	int32_t	 fs_fshift;		/* ``numfrags'' calc number of frags */
 /* these are configuration parameters */
 	int32_t	 fs_maxcontig;		/* max number of contiguous blks */
 	int32_t	 fs_maxbpg;		/* max number of blks per cyl group */
 /* these fields can be computed from the others */
 	int32_t	 fs_fragshift;		/* block to frag shift */
 	int32_t	 fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
 	int32_t	 fs_sbsize;		/* actual size of super block */
 	int32_t	 fs_csmask;		/* csum block offset */
 	int32_t	 fs_csshift;		/* csum block number */
 	int32_t	 fs_nindir;		/* value of NINDIR */
 	int32_t	 fs_inopb;		/* value of INOPB */
 	int32_t	 fs_nspf;		/* value of NSPF */
 /* yet another configuration parameter */
 	int32_t	 fs_optim;		/* optimization preference, see below */
 /* these fields are derived from the hardware */
 	int32_t	 fs_npsect;		/* # sectors/track including spares */
 	int32_t	 fs_interleave;		/* hardware sector interleave */
 	int32_t	 fs_trackskew;		/* sector 0 skew, per track */
 /* fs_id takes the space of the unused fs_headswitch and fs_trkseek fields */
 	int32_t	 fs_id[2];		/* unique filesystem id */
 /* sizes determined by number of cylinder groups and their sizes */
 	ufs_daddr_t fs_csaddr;		/* blk addr of cyl grp summary area */
 	int32_t	 fs_cssize;		/* size of cyl grp summary area */
 	int32_t	 fs_cgsize;		/* cylinder group size */
 /* these fields are derived from the hardware */
 	int32_t	 fs_ntrak;		/* tracks per cylinder */
 	int32_t	 fs_nsect;		/* sectors per track */
 	int32_t  fs_spc;			/* sectors per cylinder */
 /* this comes from the disk driver partitioning */
 	int32_t	 fs_ncyl;		/* cylinders in file system */
 /* these fields can be computed from the others */
 	int32_t	 fs_cpg;			/* cylinders per group */
 	int32_t	 fs_ipg;			/* inodes per group */
 	int32_t	 fs_fpg;			/* blocks per group * fs_frag */
 /* this data must be re-computed after crashes */
 	struct	csum fs_cstotal;	/* cylinder summary information */
 /* these fields are cleared at mount time */
 	int8_t   fs_fmod;		/* super block modified flag */
 	int8_t   fs_clean;		/* file system is clean flag */
 	int8_t 	 fs_ronly;		/* mounted read-only flag */
-	int8_t   fs_flags;		/* currently unused flag */
+	int8_t   fs_flags;		/* see FS_ flags below */
 	u_char	 fs_fsmnt[MAXMNTLEN];	/* name mounted on */
 /* these fields retain the current block allocation info */
 	int32_t	 fs_cgrotor;		/* last cg searched */
 	struct	csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */
 	int32_t	 *fs_maxcluster;	/* max cluster in each cyl group */
 	int32_t	 fs_cpc;		/* cyl per cycle in postbl */
 	int16_t	 fs_opostbl[16][8];	/* old rotation block list head */
 	int32_t	 fs_sparecon[50];	/* reserved for future constants */
 	int32_t	 fs_contigsumsize;	/* size of cluster summary array */ 
 	int32_t	 fs_maxsymlinklen;	/* max length of an internal symlink */
 	int32_t	 fs_inodefmt;		/* format of on-disk inodes */
 	u_int64_t fs_maxfilesize;	/* maximum representable file size */
 	int64_t	 fs_qbmask;		/* ~fs_bmask for use with 64-bit size */
 	int64_t	 fs_qfmask;		/* ~fs_fmask for use with 64-bit size */
 	int32_t	 fs_state;		/* validate fs_clean field */
 	int32_t	 fs_postblformat;	/* format of positional layout tables */
 	int32_t	 fs_nrpos;		/* number of rotational positions */
 	int32_t	 fs_postbloff;		/* (u_int16) rotation block list head */
 	int32_t	 fs_rotbloff;		/* (u_int8) blocks for each rotation */
 	int32_t	 fs_magic;		/* magic number */
 	u_int8_t fs_space[1];		/* list of blocks for each rotation */
 /* actually longer */
 };
 
 /*
  * Filesystem identification
  */
 #define	FS_MAGIC	0x011954	/* the fast filesystem magic number */
 #define	FS_OKAY		0x7c269d38	/* superblock checksum */
 #define FS_42INODEFMT	-1		/* 4.2BSD inode format */
 #define FS_44INODEFMT	2		/* 4.4BSD inode format */
+
 /*
  * Preference for optimization.
  */
 #define FS_OPTTIME	0	/* minimize allocation time */
 #define FS_OPTSPACE	1	/* minimize disk fragmentation */
 
 /*
+ * Filesystem flags.
+ */
+#define FS_UNCLEAN    0x01    /* filesystem not clean at mount */
+#define FS_DOSOFTDEP  0x02    /* filesystem using soft dependencies */
+
+/*
  * Rotational layout table format types
  */
 #define FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
 #define FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
 /*
  * Macros for access to superblock array structures
  */
 #define fs_postbl(fs, cylno) \
     (((fs)->fs_postblformat == FS_42POSTBLFMT) \
     ? ((fs)->fs_opostbl[cylno]) \
     : ((int16_t *)((u_int8_t *)(fs) + \
 	(fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos))
 #define fs_rotbl(fs) \
     (((fs)->fs_postblformat == FS_42POSTBLFMT) \
     ? ((fs)->fs_space) \
     : ((u_int8_t *)((u_int8_t *)(fs) + (fs)->fs_rotbloff)))
 
 /*
  * The size of a cylinder group is calculated by CGSIZE. The maximum size
  * is limited by the fact that cylinder groups are at most one block.
  * Its size is derived from the size of the maps maintained in the
  * cylinder group and the (struct cg) size.
  */
 #define CGSIZE(fs) \
     /* base cg */	(sizeof(struct cg) + sizeof(int32_t) + \
     /* blktot size */	(fs)->fs_cpg * sizeof(int32_t) + \
     /* blks size */	(fs)->fs_cpg * (fs)->fs_nrpos * sizeof(int16_t) + \
     /* inode map */	howmany((fs)->fs_ipg, NBBY) + \
     /* block map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\
     /* if present */	((fs)->fs_contigsumsize <= 0 ? 0 : \
     /* cluster sum */	(fs)->fs_contigsumsize * sizeof(int32_t) + \
     /* cluster map */	howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY)))
 
 /*
  * Convert cylinder group to base address of its global summary info.
  *
  * N.B. This macro assumes that sizeof(struct csum) is a power of two.
  */
 #define fs_cs(fs, indx) \
 	fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask]
 
 /*
  * Cylinder group block for a file system.
  */
 #define	CG_MAGIC	0x090255
 struct cg {
 	int32_t	 cg_firstfield;		/* historic cyl groups linked list */
 	int32_t	 cg_magic;		/* magic number */
 	time_t	 cg_time;		/* time last written */
 	int32_t	 cg_cgx;		/* we are the cgx'th cylinder group */
 	int16_t	 cg_ncyl;		/* number of cyl's this cg */
 	int16_t	 cg_niblk;		/* number of inode blocks this cg */
 	int32_t	 cg_ndblk;		/* number of data blocks this cg */
 	struct	csum cg_cs;		/* cylinder summary information */
 	int32_t	 cg_rotor;		/* position of last used block */
 	int32_t	 cg_frotor;		/* position of last used frag */
 	int32_t	 cg_irotor;		/* position of last used inode */
 	int32_t	 cg_frsum[MAXFRAG];	/* counts of available frags */
 	int32_t	 cg_btotoff;		/* (int32) block totals per cylinder */
 	int32_t	 cg_boff;		/* (u_int16) free block positions */
 	int32_t	 cg_iusedoff;		/* (u_int8) used inode map */
 	int32_t	 cg_freeoff;		/* (u_int8) free block map */
 	int32_t	 cg_nextfreeoff;	/* (u_int8) next available space */
 	int32_t	 cg_clustersumoff;	/* (u_int32) counts of avail clusters */
 	int32_t	 cg_clusteroff;		/* (u_int8) free cluster map */
 	int32_t	 cg_nclusterblks;	/* number of clusters this cg */
 	int32_t	 cg_sparecon[13];	/* reserved for future use */
 	u_int8_t cg_space[1];		/* space for cylinder group maps */
 /* actually longer */
 };
 
 /*
  * Macros for access to cylinder group array structures
  */
 #define cg_blktot(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_btot) \
     : ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_btotoff)))
 #define cg_blks(fs, cgp, cylno) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_b[cylno]) \
     : ((int16_t *)((u_int8_t *)(cgp) + \
 	(cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos))
 #define cg_inosused(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_iused) \
     : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_iusedoff)))
 #define cg_blksfree(cgp) \
     (((cgp)->cg_magic != CG_MAGIC) \
     ? (((struct ocg *)(cgp))->cg_free) \
     : ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_freeoff)))
 #define cg_chkmagic(cgp) \
     ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
 #define cg_clustersfree(cgp) \
     ((u_int8_t *)((u_int8_t *)(cgp) + (cgp)->cg_clusteroff))
 #define cg_clustersum(cgp) \
     ((int32_t *)((u_int8_t *)(cgp) + (cgp)->cg_clustersumoff))
 
 /*
  * The following structure is defined
  * for compatibility with old file systems.
  */
 struct ocg {
 	int32_t	 cg_firstfield;		/* historic linked list of cyl groups */
 	int32_t	 cg_unused_1;		/*     used for incore cyl groups */
 	time_t	 cg_time;		/* time last written */
 	int32_t	 cg_cgx;		/* we are the cgx'th cylinder group */
 	int16_t	 cg_ncyl;		/* number of cyl's this cg */
 	int16_t	 cg_niblk;		/* number of inode blocks this cg */
 	int32_t	 cg_ndblk;		/* number of data blocks this cg */
 	struct	csum cg_cs;		/* cylinder summary information */
 	int32_t	 cg_rotor;		/* position of last used block */
 	int32_t	 cg_frotor;		/* position of last used frag */
 	int32_t	 cg_irotor;		/* position of last used inode */
 	int32_t	 cg_frsum[8];		/* counts of available frags */
 	int32_t	 cg_btot[32];		/* block totals per cylinder */
 	int16_t	 cg_b[32][8];		/* positions of free blocks */
 	u_int8_t cg_iused[256];		/* used inode map */
 	int32_t	 cg_magic;		/* magic number */
 	u_int8_t cg_free[1];		/* free block map */
 /* actually longer */
 };
 
 /*
  * Turn file system block numbers into disk block addresses.
  * This maps file system blocks to device size blocks.
  */
 #define fsbtodb(fs, b)	((b) << (fs)->fs_fsbtodb)
 #define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
 
 /*
  * Cylinder group macros to locate things in cylinder groups.
  * They calc file system addresses of cylinder group data structures.
  */
 #define	cgbase(fs, c)	((ufs_daddr_t)((fs)->fs_fpg * (c)))
 #define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
 #define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
 #define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
 #define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
 #define cgstart(fs, c)							\
 	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
 
 /*
  * Macros for handling inode numbers:
  *     inode number to file system block offset.
  *     inode number to cylinder group number.
  *     inode number to file system block address.
  */
 #define	ino_to_cg(fs, x)	((x) / (fs)->fs_ipg)
 #define	ino_to_fsba(fs, x)						\
 	((ufs_daddr_t)(cgimin(fs, ino_to_cg(fs, x)) +			\
 	    (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs))))))
 #define	ino_to_fsbo(fs, x)	((x) % INOPB(fs))
 
 /*
  * Give cylinder group number for a file system block.
  * Give cylinder group block number for a file system block.
  */
 #define	dtog(fs, d)	((d) / (fs)->fs_fpg)
 #define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
 
 /*
  * Extract the bits for a block from a map.
  * Compute the cylinder and rotational position of a cyl block addr.
  */
 #define blkmap(fs, map, loc) \
     (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag)))
 #define cbtocylno(fs, bno) \
     ((bno) * NSPF(fs) / (fs)->fs_spc)
 #define cbtorpos(fs, bno) \
     (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \
      (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \
      (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect)
 
 /*
  * The following macros optimize certain frequently calculated
  * quantities by using shifts and masks in place of divisions
  * modulos and multiplications.
  */
 #define blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
 	((loc) & (fs)->fs_qbmask)
 #define fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
 	((loc) & (fs)->fs_qfmask)
 #define lblktosize(fs, blk)	/* calculates ((off_t)blk * fs->fs_bsize) */ \
 	((off_t)(blk) << (fs)->fs_bshift)
 /* Use this only when `blk' is known to be small, e.g., < NDADDR. */
 #define smalllblktosize(fs, blk)    /* calculates (blk * fs->fs_bsize) */ \
 	((blk) << (fs)->fs_bshift)
 #define lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
 	((loc) >> (fs)->fs_bshift)
 #define numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
 	((loc) >> (fs)->fs_fshift)
 #define blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
 	(((size) + (fs)->fs_qbmask) & (fs)->fs_bmask)
 #define fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
 	(((size) + (fs)->fs_qfmask) & (fs)->fs_fmask)
 #define fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
 	((frags) >> (fs)->fs_fragshift)
 #define blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
 	((blks) << (fs)->fs_fragshift)
 #define fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
 	((fsb) & ((fs)->fs_frag - 1))
 #define blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
 	((fsb) &~ ((fs)->fs_frag - 1))
 
 /*
  * Determine the number of available frags given a
  * percentage to hold in reserve.
  */
 #define freespace(fs, percentreserved) \
 	(blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
 	(fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100))
 
 /*
  * Determining the size of a file block in the file system.
  */
 #define blksize(fs, ip, lbn) \
 	(((lbn) >= NDADDR || (ip)->i_size >= smalllblktosize(fs, (lbn) + 1)) \
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
 #define dblksize(fs, dip, lbn) \
 	(((lbn) >= NDADDR || (dip)->di_size >= smalllblktosize(fs, (lbn) + 1)) \
 	    ? (fs)->fs_bsize \
 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
+#define sblksize(fs, size, lbn) \
+	(((lbn) >= NDADDR || (size) >= ((lbn) + 1) << (fs)->fs_bshift) \
+	  ? (fs)->fs_bsize \
+	  : (fragroundup(fs, blkoff(fs, (size)))))
+
 
 /*
  * Number of disk sectors per block/fragment; assumes DEV_BSIZE byte
  * sector size.
  */
 #define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
 #define	NSPF(fs)	((fs)->fs_nspf)
 
 /*
  * Number of inodes in a secondary storage block/fragment.
  */
 #define	INOPB(fs)	((fs)->fs_inopb)
 #define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
 
 /*
  * Number of indirects in a file system block.
  */
 #define	NINDIR(fs)	((fs)->fs_nindir)
 
 extern int inside[], around[];
 extern u_char *fragtbl[];
 
 #endif
Index: head/sys/ufs/ufs/inode.h
===================================================================
--- head/sys/ufs/ufs/inode.h	(revision 34265)
+++ head/sys/ufs/ufs/inode.h	(revision 34266)
@@ -1,172 +1,181 @@
 /*
  * Copyright (c) 1982, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)inode.h	8.9 (Berkeley) 5/14/95
- * $Id: inode.h,v 1.19 1997/12/05 13:43:47 jkh Exp $
+ * $Id: inode.h,v 1.20 1998/01/30 11:34:02 phk Exp $
  */
 
 #ifndef _UFS_UFS_INODE_H_
 #define	_UFS_UFS_INODE_H_
 
 #include <sys/lock.h>
 #include <ufs/ufs/dinode.h>
 
 /*
+ * The size of a logical block number.
+ */
+typedef long ufs_lbn_t;
+
+/*
  * This must agree with the definition in <ufs/ufs/dir.h>.
  */
 #define	doff_t		int32_t
 
 /*
  * The inode is used to describe each active (or recently active) file in the
  * UFS filesystem. It is composed of two types of information. The first part
  * is the information that is needed only while the file is active (such as
  * the identity of the file and linkage to speed its lookup). The second part
  * is the permanent meta-data associated with the file which is read in
  * from the permanent dinode from long term storage when the file becomes
  * active, and is put back when the file is no longer being used.
  */
 struct inode {
 	struct	 lock i_lock;	/* Inode lock. >Keep this first< */
 	LIST_ENTRY(inode) i_hash;/* Hash chain. */
 	struct	vnode  *i_vnode;/* Vnode associated with this inode. */
 	struct	vnode  *i_devvp;/* Vnode for block I/O. */
 	u_int32_t i_flag;	/* flags, see below */
 	dev_t	  i_dev;	/* Device associated with the inode. */
 	ino_t	  i_number;	/* The identity of the inode. */
+	int	  i_effnlink;	/* i_nlink when I/O completes */
 
 	union {			/* Associated filesystem. */
 		struct	fs *fs;		/* FFS */
 		struct	ext2_sb_info *e2fs;	/* EXT2FS */
 	} inode_u;
 #define	i_fs	inode_u.fs
 #define	i_e2fs	inode_u.e2fs
 	struct	 dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
 	u_quad_t i_modrev;	/* Revision level for NFS lease. */
 	struct	 lockf *i_lockf;/* Head of byte-level lock list. */
 	/*
 	 * Side effects; used during directory lookup.
 	 */
 	int32_t	  i_count;	/* Size of free slot in directory. */
 	doff_t	  i_endoff;	/* End of useful stuff in directory. */
 	doff_t	  i_diroff;	/* Offset in dir, where we found last entry. */
 	doff_t	  i_offset;	/* Offset of free space in directory. */
 	ino_t	  i_ino;	/* Inode number of found directory. */
 	u_int32_t i_reclen;	/* Size of found directory entry. */
 	int	  i_spare[5];	/* XXX actually non-spare (for ext2fs). */
 	/*
 	 * The on-disk dinode itself.
 	 */
 	struct	dinode i_din;	/* 128 bytes of the on-disk dinode. */
 };
 
 #define	i_atime		i_din.di_atime
 #define	i_atimensec	i_din.di_atimensec
 #define	i_blocks	i_din.di_blocks
 #define	i_ctime		i_din.di_ctime
 #define	i_ctimensec	i_din.di_ctimensec
 #define	i_db		i_din.di_db
 #define	i_flags		i_din.di_flags
 #define	i_gen		i_din.di_gen
 #define	i_gid		i_din.di_gid
 #define	i_ib		i_din.di_ib
 #define	i_mode		i_din.di_mode
 #define	i_mtime		i_din.di_mtime
 #define	i_mtimensec	i_din.di_mtimensec
 #define	i_nlink		i_din.di_nlink
 #define	i_rdev		i_din.di_rdev
 #define	i_shortlink	i_din.di_shortlink
 #define	i_size		i_din.di_size
 #define	i_uid		i_din.di_uid
 
 /* These flags are kept in i_flag. */
 #define	IN_ACCESS	0x0001		/* Access time update request. */
 #define	IN_CHANGE	0x0002		/* Inode change time update request. */
 #define	IN_UPDATE	0x0004		/* Modification time update request. */
 #define	IN_MODIFIED	0x0008		/* Inode has been modified. */
 #define	IN_RENAME	0x0010		/* Inode is being renamed. */
 #define	IN_SHLOCK	0x0020		/* File has shared lock. */
 #define	IN_EXLOCK	0x0040		/* File has exclusive lock. */
 #define	IN_HASHED	0x0080		/* Inode is on hash list */
 
 #ifdef KERNEL
 /*
  * Structure used to pass around logical block paths generated by
  * ufs_getlbns and used by truncate and bmap code.
  */
 struct indir {
 	ufs_daddr_t in_lbn;		/* Logical block number. */
 	int	in_off;			/* Offset in buffer. */
 	int	in_exists;		/* Flag if the block exists. */
 };
 
 /* Convert between inode pointers and vnode pointers. */
 #define VTOI(vp)	((struct inode *)(vp)->v_data)
 #define ITOV(ip)	((ip)->i_vnode)
 
 /*
  * XXX this is too long to be a macro, and isn't used in any time-critical
  * place; in fact it is only used in ufs_vnops.c so it shouldn't be in a
  * header file.
  */
 #define	ITIMES(ip, t1, t2) {						\
 	long tv_sec = time.tv_sec;					\
 	if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) {	\
 		(ip)->i_flag |= IN_MODIFIED;				\
 		if ((ip)->i_flag & IN_ACCESS)				\
 			(ip)->i_atime					\
 			= ((t1) == &time ? tv_sec : (t1)->tv_sec);	\
 		if ((ip)->i_flag & IN_UPDATE) {				\
 			(ip)->i_mtime					\
 			= ((t2) == &time ? tv_sec : (t2)->tv_sec);	\
 			(ip)->i_modrev++;				\
 		}							\
 		if ((ip)->i_flag & IN_CHANGE)				\
 			(ip)->i_ctime = tv_sec;				\
 		(ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);	\
 	}								\
 }
+
+/* Determine if soft dependencies are being done */
+#define DOINGSOFTDEP(vp)	((vp)->v_mount->mnt_flag & MNT_SOFTDEP)
 
 /* This overlays the fid structure (see mount.h). */
 struct ufid {
 	u_int16_t ufid_len;	/* Length of structure. */
 	u_int16_t ufid_pad;	/* Force 32-bit alignment. */
 	ino_t	  ufid_ino;	/* File number (ino). */
 	int32_t	  ufid_gen;	/* Generation number. */
 };
 #endif /* KERNEL */
 
 #endif /* !_UFS_UFS_INODE_H_ */
Index: head/sys/ufs/ufs/ufs_extern.h
===================================================================
--- head/sys/ufs/ufs/ufs_extern.h	(revision 34265)
+++ head/sys/ufs/ufs/ufs_extern.h	(revision 34266)
@@ -1,93 +1,106 @@
 /*-
  * Copyright (c) 1991, 1993, 1994
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_extern.h	8.10 (Berkeley) 5/14/95
- * $Id: ufs_extern.h,v 1.21 1997/10/16 11:59:09 phk Exp $
+ * $Id: ufs_extern.h,v 1.22 1997/10/27 12:50:57 bde Exp $
  */
 
 #ifndef _UFS_UFS_EXTERN_H_
 #define	_UFS_UFS_EXTERN_H_
 
 struct componentname;
 struct direct;
 struct indir;
 struct inode;
 struct mount;
 struct proc;
 struct sockaddr;
 struct ucred;
 struct ufid;
 struct vfsconf;
 struct vnode;
 struct vop_bmap_args;
 struct vop_cachedlookup_args;
 struct vop_generic_args;
 struct vop_inactive_args;
 struct vop_reclaim_args;
 
 int	ufs_vnoperate __P((struct vop_generic_args *));
 int	ufs_vnoperatefifo __P((struct vop_generic_args *));
 int	ufs_vnoperatespec __P((struct vop_generic_args *));
 
 int	 ufs_bmap __P((struct vop_bmap_args *));
 int	 ufs_bmaparray __P((struct vnode *, daddr_t, daddr_t *, struct indir *,
 		int *, int *, int *));
 int	 ufs_check_export __P((struct mount *, struct ufid *, 
 			       struct sockaddr *, struct vnode **, 
 			       int *exflagsp, struct ucred **));
 int	 ufs_checkpath __P((struct inode *, struct inode *, struct ucred *));
 void	 ufs_dirbad __P((struct inode *, doff_t, char *));
 int	 ufs_dirbadentry __P((struct vnode *, struct direct *, int));
 int	 ufs_dirempty __P((struct inode *, ino_t, struct ucred *));
-int	 ufs_direnter __P((struct inode *, struct vnode *,struct componentname *));
-int	 ufs_direnter2 __P((struct vnode *, struct direct *, struct ucred *,
-		struct proc *));
-int	 ufs_dirremove __P((struct vnode *, struct componentname*));
-int	 ufs_dirrewrite
-	    __P((struct inode *, struct inode *, struct componentname *));
+void	 ufs_makedirentry __P((struct inode *, struct componentname *,
+	    struct direct *));
+int	 ufs_direnter __P((struct vnode *, struct vnode *, struct direct *,
+	    struct componentname *, struct buf *));
+int	 ufs_dirremove __P((struct vnode *, struct inode *, int, int));
+int	 ufs_dirrewrite __P((struct inode *, struct inode *, ino_t, int, int));
 int	 ufs_getlbns __P((struct vnode *, ufs_daddr_t, struct indir *, int *));
 struct vnode *
 	 ufs_ihashget __P((dev_t, ino_t));
 void	 ufs_ihashinit __P((void));
 void	 ufs_ihashins __P((struct inode *));
 struct vnode *
 	 ufs_ihashlookup __P((dev_t, ino_t));
 void	 ufs_ihashrem __P((struct inode *));
 int	 ufs_inactive __P((struct vop_inactive_args *));
 int	 ufs_init __P((struct vfsconf *));
 int	 ufs_lookup __P((struct vop_cachedlookup_args *));
 int	 ufs_reclaim __P((struct vop_reclaim_args *));
 int	 ufs_root __P((struct mount *, struct vnode **));
 int	 ufs_start __P((struct mount *, int, struct proc *));
 int	 ufs_vinit __P((struct mount *, vop_t **, vop_t **, struct vnode **));
+
+/*
+ * Soft update function prototypes.
+ */
+void	softdep_setup_directory_add __P((struct buf *, struct inode *, off_t,
+	    long, struct buf *));
+void	softdep_change_directoryentry_offset __P((struct inode *, caddr_t,
+	    caddr_t, caddr_t, int));
+void	softdep_setup_remove __P((struct buf *,struct inode *, struct inode *,
+	    int));
+void	softdep_setup_directory_change __P((struct buf *, struct inode *,
+	    struct inode *, long, int));
+void	softdep_increase_linkcnt __P((struct inode *));
 
 #endif /* !_UFS_UFS_EXTERN_H_ */
Index: head/sys/ufs/ufs/ufs_lookup.c
===================================================================
--- head/sys/ufs/ufs/ufs_lookup.c	(revision 34265)
+++ head/sys/ufs/ufs/ufs_lookup.c	(revision 34266)
@@ -1,1009 +1,1098 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_lookup.c	8.15 (Berkeley) 6/16/95
- * $Id: ufs_lookup.c,v 1.20 1998/02/04 22:33:36 eivind Exp $
+ * $Id: ufs_lookup.c,v 1.21 1998/02/06 12:14:18 eivind Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 #include <sys/namei.h>
 #include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/vnode.h>
 
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #ifdef DIAGNOSTIC
 int	dirchk = 1;
 #else
 int	dirchk = 0;
 #endif
 
 /* true if old FS format...*/
 #define OFSFMT(vp)	((vp)->v_mount->mnt_maxsymlinklen <= 0)
 
 /*
  * Convert a component of a pathname into a pointer to a locked inode.
  * This is a very central and rather complicated routine.
  * If the file system is not maintained in a strict tree hierarchy,
  * this can result in a deadlock situation (see comments in code below).
  *
  * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending
  * on whether the name is to be looked up, created, renamed, or deleted.
  * When CREATE, RENAME, or DELETE is specified, information usable in
  * creating, renaming, or deleting a directory entry may be calculated.
  * If flag has LOCKPARENT or'ed into it and the target of the pathname
  * exists, lookup returns both the target and its parent directory locked.
  * When creating or renaming and LOCKPARENT is specified, the target may
  * not be ".".  When deleting and LOCKPARENT is specified, the target may
  * be "."., but the caller must check to ensure it does an vrele and vput
  * instead of two vputs.
  *
  * This routine is actually used as VOP_CACHEDLOOKUP method, and the
  * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP
  * method.
  *
  * vfs_cache_lookup() performs the following for us:
  *	check that it is a directory
  *	check accessibility of directory
  *	check for modification attempts on read-only mounts
  *	if name found in cache
  *	    if at end of path and deleting or creating
  *		drop it
  *	     else
  *		return name.
  *	return VOP_CACHEDLOOKUP()
  *
  * Overall outline of ufs_lookup:
  *
  *	search for name in directory, to found or notfound
  * notfound:
  *	if creating, return locked directory, leaving info on available slots
  *	else return error
  * found:
  *	if at end of path and deleting, return information to allow delete
  *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
  *	  inode and return info to allow rewrite
  *	if not at end, add name to cache; if at end and neither creating
  *	  nor deleting, add name to cache
  */
 int
 ufs_lookup(ap)
 	struct vop_cachedlookup_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	register struct vnode *vdp;	/* vnode for directory being searched */
 	register struct inode *dp;	/* inode for directory being searched */
 	struct buf *bp;			/* a buffer of directory entries */
 	register struct direct *ep;	/* the current directory entry */
 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
 	enum {NONE, COMPACT, FOUND} slotstatus;
 	doff_t slotoffset;		/* offset of area with free space */
 	int slotsize;			/* size of area at slotoffset */
 	int slotfreespace;		/* amount of space free in slot */
 	int slotneeded;			/* size of the entry we're seeking */
 	int numdirpasses;		/* strategy for directory search */
 	doff_t endsearch;		/* offset to end directory search */
 	doff_t prevoff;			/* prev entry dp->i_offset */
 	struct vnode *pdp;		/* saved dp during symlink work */
 	struct vnode *tdp;		/* returned by VFS_VGET */
 	doff_t enduseful;		/* pointer past last used dir slot */
 	u_long bmask;			/* block offset mask */
 	int lockparent;			/* 1 => lockparent flag is set */
 	int wantparent;			/* 1 => wantparent or lockparent flag */
 	int namlen, error;
 	struct vnode **vpp = ap->a_vpp;
 	struct componentname *cnp = ap->a_cnp;
 	struct ucred *cred = cnp->cn_cred;
 	int flags = cnp->cn_flags;
 	int nameiop = cnp->cn_nameiop;
 	struct proc *p = cnp->cn_proc;
 
 	bp = NULL;
 	slotoffset = -1;
+/*
+ *  XXX there was a soft-update diff about this I couldn't merge.
+ * I think this was the equiv.
+ */
 	*vpp = NULL;
+
 	vdp = ap->a_dvp;
 	dp = VTOI(vdp);
 	lockparent = flags & LOCKPARENT;
 	wantparent = flags & (LOCKPARENT|WANTPARENT);
 
 	/*
 	 * We now have a segment name to search for, and a directory to search.
 	 *
 	 * Suppress search for slots unless creating
 	 * file and at end of pathname, in which case
 	 * we watch for a place to put the new file in
 	 * case it doesn't already exist.
 	 */
 	slotstatus = FOUND;
 	slotfreespace = slotsize = slotneeded = 0;
 	if ((nameiop == CREATE || nameiop == RENAME) &&
 	    (flags & ISLASTCN)) {
 		slotstatus = NONE;
 		slotneeded = DIRECTSIZ(cnp->cn_namelen);
 	}
 
 	/*
 	 * If there is cached information on a previous search of
 	 * this directory, pick up where we last left off.
 	 * We cache only lookups as these are the most common
 	 * and have the greatest payoff. Caching CREATE has little
 	 * benefit as it usually must search the entire directory
 	 * to determine that the entry does not exist. Caching the
 	 * location of the last DELETE or RENAME has not reduced
 	 * profiling time and hence has been removed in the interest
 	 * of simplicity.
 	 */
 	bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
 	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
 	    dp->i_diroff > dp->i_size) {
 		entryoffsetinblock = 0;
 		dp->i_offset = 0;
 		numdirpasses = 1;
 	} else {
 		dp->i_offset = dp->i_diroff;
 		if ((entryoffsetinblock = dp->i_offset & bmask) &&
 		    (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp)))
 			return (error);
 		numdirpasses = 2;
 		nchstats.ncs_2passes++;
 	}
 	prevoff = dp->i_offset;
 	endsearch = roundup2(dp->i_size, DIRBLKSIZ);
 	enduseful = 0;
 
 searchloop:
 	while (dp->i_offset < endsearch) {
 		/*
 		 * If necessary, get the next directory block.
 		 */
 		if ((dp->i_offset & bmask) == 0) {
 			if (bp != NULL)
 				brelse(bp);
 			error =
 			    UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp);
 			if (error)
 				return (error);
 			entryoffsetinblock = 0;
 		}
 		/*
 		 * If still looking for a slot, and at a DIRBLKSIZE
 		 * boundary, have to start looking for free space again.
 		 */
 		if (slotstatus == NONE &&
 		    (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) {
 			slotoffset = -1;
 			slotfreespace = 0;
 		}
 		/*
 		 * Get pointer to next entry.
 		 * Full validation checks are slow, so we only check
 		 * enough to insure forward progress through the
 		 * directory. Complete checks can be run by patching
 		 * "dirchk" to be true.
 		 */
 		ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock);
 		if (ep->d_reclen == 0 ||
 		    (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) {
 			int i;
 
 			ufs_dirbad(dp, dp->i_offset, "mangled entry");
 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
 			dp->i_offset += i;
 			entryoffsetinblock += i;
 			continue;
 		}
 
 		/*
 		 * If an appropriate sized slot has not yet been found,
 		 * check to see if one is available. Also accumulate space
 		 * in the current block so that we can determine if
 		 * compaction is viable.
 		 */
 		if (slotstatus != FOUND) {
 			int size = ep->d_reclen;
 
 			if (ep->d_ino != 0)
 				size -= DIRSIZ(OFSFMT(vdp), ep);
 			if (size > 0) {
 				if (size >= slotneeded) {
 					slotstatus = FOUND;
 					slotoffset = dp->i_offset;
 					slotsize = ep->d_reclen;
 				} else if (slotstatus == NONE) {
 					slotfreespace += size;
 					if (slotoffset == -1)
 						slotoffset = dp->i_offset;
 					if (slotfreespace >= slotneeded) {
 						slotstatus = COMPACT;
 						slotsize = dp->i_offset +
 						      ep->d_reclen - slotoffset;
 					}
 				}
 			}
 		}
 
 		/*
 		 * Check for a name match.
 		 */
 		if (ep->d_ino) {
 #			if (BYTE_ORDER == LITTLE_ENDIAN)
 				if (OFSFMT(vdp))
 					namlen = ep->d_type;
 				else
 					namlen = ep->d_namlen;
 #			else
 				namlen = ep->d_namlen;
 #			endif
 			if (namlen == cnp->cn_namelen &&
 				(cnp->cn_nameptr[0] == ep->d_name[0]) &&
 			    !bcmp(cnp->cn_nameptr, ep->d_name,
 				(unsigned)namlen)) {
 				/*
 				 * Save directory entry's inode number and
 				 * reclen in ndp->ni_ufs area, and release
 				 * directory buffer.
 				 */
 				if (vdp->v_mount->mnt_maxsymlinklen > 0 &&
 				    ep->d_type == DT_WHT) {
 					slotstatus = FOUND;
 					slotoffset = dp->i_offset;
 					slotsize = ep->d_reclen;
 					dp->i_reclen = slotsize;
 					enduseful = dp->i_size;
 					ap->a_cnp->cn_flags |= ISWHITEOUT;
 					numdirpasses--;
 					goto notfound;
 				}
 				dp->i_ino = ep->d_ino;
 				dp->i_reclen = ep->d_reclen;
 				brelse(bp);
 				goto found;
 			}
 		}
 		prevoff = dp->i_offset;
 		dp->i_offset += ep->d_reclen;
 		entryoffsetinblock += ep->d_reclen;
 		if (ep->d_ino)
 			enduseful = dp->i_offset;
 	}
 notfound:
 	/*
 	 * If we started in the middle of the directory and failed
 	 * to find our target, we must check the beginning as well.
 	 */
 	if (numdirpasses == 2) {
 		numdirpasses--;
 		dp->i_offset = 0;
 		endsearch = dp->i_diroff;
 		goto searchloop;
 	}
 	if (bp != NULL)
 		brelse(bp);
 	/*
 	 * If creating, and at end of pathname and current
 	 * directory has not been removed, then can consider
 	 * allowing file to be created.
 	 */
 	if ((nameiop == CREATE || nameiop == RENAME ||
 	     (nameiop == DELETE &&
 	      (ap->a_cnp->cn_flags & DOWHITEOUT) &&
 	      (ap->a_cnp->cn_flags & ISWHITEOUT))) &&
-	    (flags & ISLASTCN) && dp->i_nlink != 0) {
+	    (flags & ISLASTCN) && dp->i_effnlink != 0) {
 		/*
 		 * Access for write is interpreted as allowing
 		 * creation of files in the directory.
 		 */
 		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
 		if (error)
 			return (error);
 		/*
 		 * Return an indication of where the new directory
 		 * entry should be put.  If we didn't find a slot,
 		 * then set dp->i_count to 0 indicating
 		 * that the new slot belongs at the end of the
 		 * directory. If we found a slot, then the new entry
 		 * can be put in the range from dp->i_offset to
 		 * dp->i_offset + dp->i_count.
 		 */
 		if (slotstatus == NONE) {
 			dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ);
 			dp->i_count = 0;
 			enduseful = dp->i_offset;
 		} else if (nameiop == DELETE) {
 			dp->i_offset = slotoffset;
 			if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 				dp->i_count = 0;
 			else
 				dp->i_count = dp->i_offset - prevoff;
 		} else {
 			dp->i_offset = slotoffset;
 			dp->i_count = slotsize;
 			if (enduseful < slotoffset + slotsize)
 				enduseful = slotoffset + slotsize;
 		}
 		dp->i_endoff = roundup2(enduseful, DIRBLKSIZ);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 		/*
 		 * We return with the directory locked, so that
 		 * the parameters we set up above will still be
 		 * valid if we actually decide to do a direnter().
 		 * We return ni_vp == NULL to indicate that the entry
 		 * does not currently exist; we leave a pointer to
 		 * the (locked) directory inode in ndp->ni_dvp.
 		 * The pathname buffer is saved so that the name
 		 * can be obtained later.
 		 *
 		 * NB - if the directory is unlocked, then this
 		 * information cannot be used.
 		 */
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(vdp, 0, p);
 		return (EJUSTRETURN);
 	}
 	/*
 	 * Insert name into cache (as non-existent) if appropriate.
 	 */
 	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
 		cache_enter(vdp, *vpp, cnp);
 	return (ENOENT);
 
 found:
 	if (numdirpasses == 2)
 		nchstats.ncs_pass2++;
 	/*
 	 * Check that directory length properly reflects presence
 	 * of this entry.
 	 */
 	if (entryoffsetinblock + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) {
 		ufs_dirbad(dp, dp->i_offset, "i_size too small");
 		dp->i_size = entryoffsetinblock + DIRSIZ(OFSFMT(vdp), ep);
 		dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 
 	/*
 	 * Found component in pathname.
 	 * If the final component of path name, save information
 	 * in the cache as to where the entry was found.
 	 */
 	if ((flags & ISLASTCN) && nameiop == LOOKUP)
 		dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1);
 
 	/*
 	 * If deleting, and at end of pathname, return
 	 * parameters which can be used to remove file.
 	 * If the wantparent flag isn't set, we return only
 	 * the directory (in ndp->ni_dvp), otherwise we go
 	 * on and lock the inode, being careful with ".".
 	 */
 	if (nameiop == DELETE && (flags & ISLASTCN)) {
 		/*
 		 * Write access to directory required to delete files.
 		 */
 		error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc);
 		if (error)
 			return (error);
 		/*
 		 * Return pointer to current entry in dp->i_offset,
 		 * and distance past previous entry (if there
 		 * is a previous entry in this block) in dp->i_count.
 		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
 		 */
 		if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0)
 			dp->i_count = 0;
 		else
 			dp->i_count = dp->i_offset - prevoff;
 		if (dp->i_number == dp->i_ino) {
 			VREF(vdp);
 			*vpp = vdp;
 			return (0);
 		}
 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
 		if (error)
 			return (error);
 		/*
 		 * If directory is "sticky", then user must own
 		 * the directory, or the file in it, else she
 		 * may not delete it (unless she's root). This
 		 * implements append-only directories.
 		 */
 		if ((dp->i_mode & ISVTX) &&
 		    cred->cr_uid != 0 &&
 		    cred->cr_uid != dp->i_uid &&
 		    VTOI(tdp)->i_uid != cred->cr_uid) {
 			vput(tdp);
 			return (EPERM);
 		}
 		*vpp = tdp;
 		if (!lockparent)
 			VOP_UNLOCK(vdp, 0, p);
 		return (0);
 	}
 
 	/*
 	 * If rewriting (RENAME), return the inode and the
 	 * information required to rewrite the present directory
 	 * Must get inode of directory entry to verify it's a
 	 * regular file, or empty directory.
 	 */
 	if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
 		if (error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc))
 			return (error);
 		/*
 		 * Careful about locking second inode.
 		 * This can only occur if the target is ".".
 		 */
 		if (dp->i_number == dp->i_ino)
 			return (EISDIR);
 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
 		if (error)
 			return (error);
 		*vpp = tdp;
 		cnp->cn_flags |= SAVENAME;
 		if (!lockparent)
 			VOP_UNLOCK(vdp, 0, p);
 		return (0);
 	}
 
 	/*
 	 * Step through the translation in the name.  We do not `vput' the
 	 * directory because we may need it again if a symbolic link
 	 * is relative to the current directory.  Instead we save it
 	 * unlocked as "pdp".  We must get the target inode before unlocking
 	 * the directory to insure that the inode will not be removed
 	 * before we get it.  We prevent deadlock by always fetching
 	 * inodes from the root, moving down the directory tree. Thus
 	 * when following backward pointers ".." we must unlock the
 	 * parent directory before getting the requested directory.
 	 * There is a potential race condition here if both the current
 	 * and parent directories are removed before the VFS_VGET for the
 	 * inode associated with ".." returns.  We hope that this occurs
 	 * infrequently since we cannot avoid this race condition without
 	 * implementing a sophisticated deadlock detection algorithm.
 	 * Note also that this simple deadlock detection scheme will not
 	 * work if the file system has any hard links other than ".."
 	 * that point backwards in the directory structure.
 	 */
 	pdp = vdp;
 	if (flags & ISDOTDOT) {
 		VOP_UNLOCK(pdp, 0, p);	/* race to get the inode */
 		if (error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) {
 			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
 			return (error);
 		}
 		if (lockparent && (flags & ISLASTCN) &&
 		    (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
 			vput(tdp);
 			return (error);
 		}
 		*vpp = tdp;
 	} else if (dp->i_number == dp->i_ino) {
 		VREF(vdp);	/* we want ourself, ie "." */
 		*vpp = vdp;
 	} else {
 		error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp);
 		if (error)
 			return (error);
 		if (!lockparent || !(flags & ISLASTCN))
 			VOP_UNLOCK(pdp, 0, p);
 		*vpp = tdp;
 	}
 
 	/*
 	 * Insert name into cache if appropriate.
 	 */
 	if (cnp->cn_flags & MAKEENTRY)
 		cache_enter(vdp, *vpp, cnp);
 	return (0);
 }
 
 void
 ufs_dirbad(ip, offset, how)
 	struct inode *ip;
 	doff_t offset;
 	char *how;
 {
 	struct mount *mp;
 
 	mp = ITOV(ip)->v_mount;
 	(void)printf("%s: bad dir ino %ld at offset %ld: %s\n",
 	    mp->mnt_stat.f_mntonname, ip->i_number, offset, how);
 	if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0)
 		panic("ufs_dirbad: bad dir");
 }
 
 /*
  * Do consistency checking on a directory entry:
  *	record length must be multiple of 4
  *	entry must fit in rest of its DIRBLKSIZ block
  *	record must be large enough to contain entry
  *	name is not longer than MAXNAMLEN
  *	name must be as long as advertised, and null terminated
  */
 int
 ufs_dirbadentry(dp, ep, entryoffsetinblock)
 	struct vnode *dp;
 	register struct direct *ep;
 	int entryoffsetinblock;
 {
 	register int i;
 	int namlen;
 
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (OFSFMT(dp))
 			namlen = ep->d_type;
 		else
 			namlen = ep->d_namlen;
 #	else
 		namlen = ep->d_namlen;
 #	endif
 	if ((ep->d_reclen & 0x3) != 0 ||
 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
 	    ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) {
 		/*return (1); */
 		printf("First bad\n");
 		goto bad;
 	}
 	if (ep->d_ino == 0)
 		return (0);
 	for (i = 0; i < namlen; i++)
 		if (ep->d_name[i] == '\0') {
 			/*return (1); */
 			printf("Second bad\n");
 			goto bad;
 	}
 	if (ep->d_name[i])
 		goto bad;
 	return (0);
 bad:
 	return (1);
 }
 
 /*
- * Write a directory entry after a call to namei, using the parameters
- * that it left in nameidata.  The argument ip is the inode which the new
- * directory entry will refer to.  Dvp is a pointer to the directory to
- * be written, which was left locked by namei. Remaining parameters
- * (dp->i_offset, dp->i_count) indicate how the space for the new
- * entry is to be obtained.
+ * Construct a new directory entry after a call to namei, using the
+ * parameters that it left in the componentname argument cnp. The
+ * argument ip is the inode to which the new directory entry will refer.
  */
-int
-ufs_direnter(ip, dvp, cnp)
+void
+ufs_makedirentry(ip, cnp, newdirp)
 	struct inode *ip;
-	struct vnode *dvp;
-	register struct componentname *cnp;
+	struct componentname *cnp;
+	struct direct *newdirp;
 {
-	register struct inode *dp;
-	struct direct newdir;
 
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & SAVENAME) == 0)
-		panic("ufs_direnter: missing name");
+		panic("ufs_makedirentry: missing name");
 #endif
-	dp = VTOI(dvp);
-	newdir.d_ino = ip->i_number;
-	newdir.d_namlen = cnp->cn_namelen;
-	bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
-	if (!OFSFMT(dvp))
-		newdir.d_type = IFTODT(ip->i_mode);
+	newdirp->d_ino = ip->i_number;
+	newdirp->d_namlen = cnp->cn_namelen;
+	bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1);
+	if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0)
+		newdirp->d_type = IFTODT(ip->i_mode);
 	else {
-		newdir.d_type = 0;
+		newdirp->d_type = 0;
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
-			{ u_char tmp = newdir.d_namlen;
-			newdir.d_namlen = newdir.d_type;
-			newdir.d_type = tmp; }
+			{ u_char tmp = newdirp->d_namlen;
+			newdirp->d_namlen = newdirp->d_type;
+			newdirp->d_type = tmp; }
 #		endif
 	}
-	return (ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc));
 }
 
 /*
- * Common entry point for directory entry removal used by ufs_direnter
- * and ufs_whiteout
+ * Write a directory entry after a call to namei, using the parameters
+ * that it left in nameidata. The argument dirp is the new directory
+ * entry contents. Dvp is a pointer to the directory to be written,
+ * which was left locked by namei. Remaining parameters (dp->i_offset, 
+ * dp->i_count) indicate how the space for the new entry is to be obtained.
+ * Non-null bp indicates that a directory is being created (for the
+ * soft dependency code).
  */
 int
-ufs_direnter2(dvp, dirp, cr, p)
+ufs_direnter(dvp, tvp, dirp, cnp, newdirbp)
 	struct vnode *dvp;
+	struct vnode *tvp;
 	struct direct *dirp;
+	struct componentname *cnp;
+	struct buf *newdirbp;
+{
 	struct ucred *cr;
 	struct proc *p;
-{
 	int newentrysize;
 	struct inode *dp;
 	struct buf *bp;
-	struct iovec aiov;
-	struct uio auio;
 	u_int dsize;
 	struct direct *ep, *nep;
-	int error, loc, spacefree;
+	int error, ret, blkoff, loc, spacefree, flags;
 	char *dirbuf;
 
+	p = curproc;	/* XXX */
+	cr = p->p_ucred;
+
 	dp = VTOI(dvp);
 	newentrysize = DIRSIZ(OFSFMT(dvp), dirp);
 
 	if (dp->i_count == 0) {
 		/*
 		 * If dp->i_count is 0, then namei could find no
 		 * space in the directory. Here, dp->i_offset will
 		 * be on a directory block boundary and we will write the
 		 * new entry into a fresh block.
 		 */
 		if (dp->i_offset & (DIRBLKSIZ - 1))
-			panic("ufs_direnter2: newblk");
-		auio.uio_offset = dp->i_offset;
+			panic("ufs_direnter: newblk");
+		flags = B_CLRBUF;
+		if (!DOINGSOFTDEP(dvp))
+			flags |= B_SYNC;
+		if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ,
+		    cr, flags, &bp)) != 0) {
+			if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+				bdwrite(newdirbp);
+			return (error);
+		}
+		dp->i_size = dp->i_offset + DIRBLKSIZ;
+		dp->i_flag |= IN_CHANGE | IN_UPDATE;
+		vnode_pager_setsize(dvp, (u_long)dp->i_size);
 		dirp->d_reclen = DIRBLKSIZ;
-		auio.uio_resid = newentrysize;
-		aiov.iov_len = newentrysize;
-		aiov.iov_base = (caddr_t)dirp;
-		auio.uio_iov = &aiov;
-		auio.uio_iovcnt = 1;
-		auio.uio_rw = UIO_WRITE;
-		auio.uio_segflg = UIO_SYSSPACE;
-		auio.uio_procp = (struct proc *)0;
-		error = VOP_WRITE(dvp, &auio, IO_SYNC, cr);
-		if (DIRBLKSIZ >
-		    VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
-			/* XXX should grow with balloc() */
-			panic("ufs_direnter2: frag size");
-		else if (!error) {
-			dp->i_size = roundup2(dp->i_size, DIRBLKSIZ);
-			dp->i_flag |= IN_CHANGE;
+		blkoff = dp->i_offset &
+		    (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1);
+		bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize);
+		if (DOINGSOFTDEP(dvp)) {
+			/*
+			 * Ensure that the entire newly allocated block is a
+			 * valid directory so that future growth within the
+			 * block does not have to ensure that the block is
+			 * written before the inode.
+			 */
+			blkoff += DIRBLKSIZ;
+			while (blkoff < bp->b_bcount) {
+				((struct direct *)
+				   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
+				blkoff += DIRBLKSIZ;
+			}
+			softdep_setup_directory_add(bp, dp, dp->i_offset,
+			    dirp->d_ino, newdirbp);
+			bdwrite(bp);
+		} else {
+			error = VOP_BWRITE(bp);
 		}
+		ret = UFS_UPDATE(dvp, &time, &time, !DOINGSOFTDEP(dvp));
+		if (error == 0)
+			return (ret);
 		return (error);
 	}
 
 	/*
-	 * If dp->i_count is non-zero, then namei found space
-	 * for the new entry in the range dp->i_offset to
-	 * dp->i_offset + dp->i_count in the directory.
-	 * To use this space, we may have to compact the entries located
-	 * there, by copying them together towards the beginning of the
-	 * block, leaving the free space in one usable chunk at the end.
+	 * If dp->i_count is non-zero, then namei found space for the new
+	 * entry in the range dp->i_offset to dp->i_offset + dp->i_count
+	 * in the directory. To use this space, we may have to compact
+	 * the entries located there, by copying them together towards the
+	 * beginning of the block, leaving the free space in one usable
+	 * chunk at the end.
 	 */
 
 	/*
 	 * Increase size of directory if entry eats into new space.
 	 * This should never push the size past a new multiple of
 	 * DIRBLKSIZE.
 	 *
 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
 	 */
 	if (dp->i_offset + dp->i_count > dp->i_size)
 		dp->i_size = dp->i_offset + dp->i_count;
 	/*
 	 * Get the block containing the space for the new directory entry.
 	 */
 	error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp);
-	if (error)
+	if (error) {
+		if (DOINGSOFTDEP(dvp) && newdirbp != NULL)
+			bdwrite(newdirbp);
 		return (error);
+	}
 	/*
 	 * Find space for the new entry. In the simple case, the entry at
 	 * offset base will have the space. If it does not, then namei
 	 * arranged that compacting the region dp->i_offset to
-	 * dp->i_offset + dp->i_count would yield the
-	 * space.
+	 * dp->i_offset + dp->i_count would yield the space.
 	 */
 	ep = (struct direct *)dirbuf;
 	dsize = DIRSIZ(OFSFMT(dvp), ep);
 	spacefree = ep->d_reclen - dsize;
 	for (loc = ep->d_reclen; loc < dp->i_count; ) {
 		nep = (struct direct *)(dirbuf + loc);
 		if (ep->d_ino) {
 			/* trim the existing slot */
 			ep->d_reclen = dsize;
 			ep = (struct direct *)((char *)ep + dsize);
 		} else {
 			/* overwrite; nothing there; header is ours */
 			spacefree += dsize;
 		}
 		dsize = DIRSIZ(OFSFMT(dvp), nep);
 		spacefree += nep->d_reclen - dsize;
 		loc += nep->d_reclen;
-		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
+		if (DOINGSOFTDEP(dvp))
+			softdep_change_directoryentry_offset(dp, dirbuf,
+			    (caddr_t)nep, (caddr_t)ep, dsize); 
+		else
+			bcopy((caddr_t)nep, (caddr_t)ep, dsize);
 	}
 	/*
 	 * Update the pointer fields in the previous entry (if any),
 	 * copy in the new entry, and write out the block.
 	 */
 	if (ep->d_ino == 0 ||
 	    (ep->d_ino == WINO &&
 	     bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) {
 		if (spacefree + dsize < newentrysize)
-			panic("ufs_direnter2: compact1");
+			panic("ufs_direnter: compact1");
 		dirp->d_reclen = spacefree + dsize;
 	} else {
 		if (spacefree < newentrysize)
-			panic("ufs_direnter2: compact2");
+			panic("ufs_direnter: compact2");
 		dirp->d_reclen = spacefree;
 		ep->d_reclen = dsize;
 		ep = (struct direct *)((char *)ep + dsize);
 	}
 	bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize);
 
-	if (dvp->v_mount->mnt_flag & MNT_ASYNC) {
+	if (DOINGSOFTDEP(dvp)) {
+		softdep_setup_directory_add(bp, dp,
+		    dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp);
 		bdwrite(bp);
-		error = 0;
 	} else {
-		error = bowrite(bp);
+		if (dvp->v_mount->mnt_flag & MNT_ASYNC) {
+			bdwrite(bp);
+			error = 0;
+		} else {
+			error = bowrite(bp);
+		}
 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
-	if (!error && dp->i_endoff && dp->i_endoff < dp->i_size)
-		error = UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p);
+	/*
+	 * If all went well, and the directory can be shortened, proceed
+	 * with the truncation. Note that we have to unlock the inode for
+	 * the entry that we just entered, as the truncation may need to
+	 * lock other inodes which can lead to deadlock if we also hold a
+	 * lock on the newly entered node.
+	 */
+	if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) {
+		if (tvp != NULL)
+			VOP_UNLOCK(tvp, 0, p);
+		(void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p);
+		if (tvp != NULL)
+			vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
+	}
 	return (error);
 }
 
 /*
  * Remove a directory entry after a call to namei, using
  * the parameters which it left in nameidata. The entry
  * dp->i_offset contains the offset into the directory of the
  * entry to be eliminated.  The dp->i_count field contains the
  * size of the previous record in the directory.  If this
  * is 0, the first entry is being deleted, so we need only
  * zero the inode number to mark the entry as free.  If the
  * entry is not the first in the directory, we must reclaim
  * the space of the now empty record by adding the record size
  * to the size of the previous entry.
  */
 int
-ufs_dirremove(dvp, cnp)
+ufs_dirremove(dvp, ip, flags, isrmdir)
 	struct vnode *dvp;
-	struct componentname *cnp;
+	struct inode *ip;
+	int flags;
+	int isrmdir;
 {
-	register struct inode *dp;
+	struct inode *dp;
 	struct direct *ep;
 	struct buf *bp;
 	int error;
 
 	dp = VTOI(dvp);
 
-	if (cnp->cn_flags & DOWHITEOUT) {
+	if (flags & DOWHITEOUT) {
 		/*
 		 * Whiteout entry: set d_ino to WINO.
 		 */
 		if (error =
 		    UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp))
 			return (error);
 		ep->d_ino = WINO;
 		ep->d_type = DT_WHT;
-		error = VOP_BWRITE(bp);
-		dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (error);
+		goto out;
 	}
 
+	if ((error = UFS_BLKATOFF(dvp,
+	    (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0)
+		return (error);
 	if (dp->i_count == 0) {
 		/*
 		 * First entry in block: set d_ino to zero.
 		 */
+#if 0
 		error =
 		    UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp);
 		if (error)
 			return (error);
+#endif
 		ep->d_ino = 0;
-		error = bowrite(bp);
-		dp->i_flag |= IN_CHANGE | IN_UPDATE;
-		return (error);
+	} else {
+		/*
+		 * Collapse new free space into previous entry.
+		 */
+		ep->d_reclen += dp->i_reclen;
 	}
+out:
+	if (ip) {
+		ip->i_effnlink--;
+		ip->i_flag |= IN_CHANGE;
+	}
+	if (DOINGSOFTDEP(dvp)) {
+		if (ip)
+			softdep_setup_remove(bp, dp, ip, isrmdir);
+		bdwrite(bp);
+	} else {
+		if (ip)
+			ip->i_nlink--;
+		error = bowrite(bp); /* maybe this should be as below? */
+	}
+#if 0
 	/*
 	 * Collapse new free space into previous entry.
 	 */
 	error = UFS_BLKATOFF(dvp, (off_t)(dp->i_offset - dp->i_count),
 	    (char **)&ep, &bp);
 	if (error)
 		return (error);
 	ep->d_reclen += dp->i_reclen;
 	if (dvp->v_mount->mnt_flag & MNT_ASYNC) {
 		bdwrite(bp);
 		error = 0;
 	} else {
 		error = bowrite(bp);
 	}
+#endif
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Rewrite an existing directory entry to point at the inode
  * supplied.  The parameters describing the directory entry are
  * set up by a call to namei.
  */
 int
-ufs_dirrewrite(dp, ip, cnp)
-	struct inode *dp, *ip;
-	struct componentname *cnp;
+ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir)
+	struct inode *dp, *oip;
+	ino_t newinum;
+	int newtype;
+	int isrmdir;
 {
 	struct buf *bp;
 	struct direct *ep;
 	struct vnode *vdp = ITOV(dp);
 	int error;
 
 	error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp);
 	if (error)
 		return (error);
-	ep->d_ino = ip->i_number;
+	ep->d_ino = newinum;
 	if (!OFSFMT(vdp))
-		ep->d_type = IFTODT(ip->i_mode);
-	if (vdp->v_mount->mnt_flag & MNT_ASYNC) {
+		ep->d_type = newtype;
+	oip->i_effnlink--;
+	oip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(vdp)) {
+		softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir);
 		bdwrite(bp);
-		error = 0;
 	} else {
-		error = bowrite(bp);
+		oip->i_nlink--;
+		if (vdp->v_mount->mnt_flag & MNT_ASYNC) {
+			bdwrite(bp);
+			error = 0;
+		} else {
+			error = bowrite(bp);
+		}
 	}
 	dp->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (error);
 }
 
 /*
  * Check if a directory is empty or not.
  * Inode supplied must be locked.
  *
  * Using a struct dirtemplate here is not precisely
  * what we want, but better than using a struct direct.
  *
  * NB: does not handle corrupted directories.
  */
 int
 ufs_dirempty(ip, parentino, cred)
 	register struct inode *ip;
 	ino_t parentino;
 	struct ucred *cred;
 {
 	register off_t off;
 	struct dirtemplate dbuf;
 	register struct direct *dp = (struct direct *)&dbuf;
 	int error, count, namlen;
 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
 
 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
 		error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off,
 		   UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0);
 		/*
 		 * Since we read MINDIRSIZ, residual must
 		 * be 0 unless we're at end of file.
 		 */
 		if (error || count != 0)
 			return (0);
 		/* avoid infinite loops */
 		if (dp->d_reclen == 0)
 			return (0);
 		/* skip empty entries */
 		if (dp->d_ino == 0 || dp->d_ino == WINO)
 			continue;
 		/* accept only "." and ".." */
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (OFSFMT(ITOV(ip)))
 				namlen = dp->d_type;
 			else
 				namlen = dp->d_namlen;
 #		else
 			namlen = dp->d_namlen;
 #		endif
 		if (namlen > 2)
 			return (0);
 		if (dp->d_name[0] != '.')
 			return (0);
 		/*
 		 * At this point namlen must be 1 or 2.
 		 * 1 implies ".", 2 implies ".." if second
 		 * char is also "."
 		 */
-		if (namlen == 1)
+		if (namlen == 1 && dp->d_ino == ip->i_number)
 			continue;
 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
 			continue;
 		return (0);
 	}
 	return (1);
 }
 
 /*
  * Check if source directory is in the path of the target directory.
  * Target is supplied locked, source is unlocked.
  * The target is always vput before returning.
  */
 int
 ufs_checkpath(source, target, cred)
 	struct inode *source, *target;
 	struct ucred *cred;
 {
 	struct vnode *vp;
 	int error, rootino, namlen;
 	struct dirtemplate dirbuf;
 
 	vp = ITOV(target);
 	if (target->i_number == source->i_number) {
 		error = EEXIST;
 		goto out;
 	}
 	rootino = ROOTINO;
 	error = 0;
 	if (target->i_number == rootino)
 		goto out;
 
 	for (;;) {
 		if (vp->v_type != VDIR) {
 			error = ENOTDIR;
 			break;
 		}
 		error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf,
 			sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE,
 			IO_NODELOCKED, cred, (int *)0, (struct proc *)0);
 		if (error != 0)
 			break;
 #		if (BYTE_ORDER == LITTLE_ENDIAN)
 			if (OFSFMT(vp))
 				namlen = dirbuf.dotdot_type;
 			else
 				namlen = dirbuf.dotdot_namlen;
 #		else
 			namlen = dirbuf.dotdot_namlen;
 #		endif
 		if (namlen != 2 ||
 		    dirbuf.dotdot_name[0] != '.' ||
 		    dirbuf.dotdot_name[1] != '.') {
 			error = ENOTDIR;
 			break;
 		}
 		if (dirbuf.dotdot_ino == source->i_number) {
 			error = EINVAL;
 			break;
 		}
 		if (dirbuf.dotdot_ino == rootino)
 			break;
 		vput(vp);
 		error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp);
 		if (error) {
 			vp = NULL;
 			break;
 		}
 	}
 
 out:
 	if (error == ENOTDIR)
 		printf("checkpath: .. not a directory\n");
 	if (vp != NULL)
 		vput(vp);
 	return (error);
 }
Index: head/sys/ufs/ufs/ufs_quota.c
===================================================================
--- head/sys/ufs/ufs/ufs_quota.c	(revision 34265)
+++ head/sys/ufs/ufs/ufs_quota.c	(revision 34266)
@@ -1,941 +1,945 @@
 /*
  * Copyright (c) 1982, 1986, 1990, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Robert Elz at The University of Melbourne.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_quota.c	8.5 (Berkeley) 5/20/95
- * $Id: ufs_quota.c,v 1.18 1998/02/06 12:14:18 eivind Exp $
+ * $Id: ufs_quota.c,v 1.19 1998/02/09 06:11:12 eivind Exp $
  */
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/malloc.h>
 #include <sys/fcntl.h>
 #include <sys/proc.h>
 #include <sys/vnode.h>
 #include <sys/mount.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 
 static MALLOC_DEFINE(M_DQUOT, "UFS quota", "UFS quota entries");
 
 /*
  * Quota name to error message mapping.
  */
 static char *quotatypes[] = INITQFNAMES;
 
 static int chkdqchg __P((struct inode *, long, struct ucred *, int));
 static int chkiqchg __P((struct inode *, long, struct ucred *, int));
 static int dqget __P((struct vnode *,
 		u_long, struct ufsmount *, int, struct dquot **));
 static int dqsync __P((struct vnode *, struct dquot *));
 static void dqflush __P((struct vnode *));
 
 #ifdef DIAGNOSTIC
 static void dqref __P((struct dquot *));
 static void chkdquot __P((struct inode *));
 #endif
 
 /*
  * Set up the quotas for an inode.
  *
  * This routine completely defines the semantics of quotas.
  * If other criterion want to be used to establish quotas, the
  * MAXQUOTAS value in quotas.h should be increased, and the
  * additional dquots set up here.
  */
 int
 getinoquota(ip)
 	register struct inode *ip;
 {
 	struct ufsmount *ump;
 	struct vnode *vp = ITOV(ip);
 	int error;
 
 	ump = VFSTOUFS(vp->v_mount);
 	/*
 	 * Set up the user quota based on file uid.
 	 * EINVAL means that quotas are not enabled.
 	 */
 	if (ip->i_dquot[USRQUOTA] == NODQUOT &&
 	    (error =
 		dqget(vp, ip->i_uid, ump, USRQUOTA, &ip->i_dquot[USRQUOTA])) &&
 	    error != EINVAL)
 		return (error);
 	/*
 	 * Set up the group quota based on file gid.
 	 * EINVAL means that quotas are not enabled.
 	 */
 	if (ip->i_dquot[GRPQUOTA] == NODQUOT &&
 	    (error =
 		dqget(vp, ip->i_gid, ump, GRPQUOTA, &ip->i_dquot[GRPQUOTA])) &&
 	    error != EINVAL)
 		return (error);
 	return (0);
 }
 
 /*
  * Update disk usage, and take corrective action.
  */
 int
 chkdq(ip, change, cred, flags)
 	register struct inode *ip;
 	long change;
 	struct ucred *cred;
 	int flags;
 {
 	register struct dquot *dq;
 	register int i;
 	int ncurblocks, error;
 
 #ifdef DIAGNOSTIC
 	if ((flags & CHOWN) == 0)
 		chkdquot(ip);
 #endif
 	if (change == 0)
 		return (0);
 	if (change < 0) {
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if ((dq = ip->i_dquot[i]) == NODQUOT)
 				continue;
 			while (dq->dq_flags & DQ_LOCK) {
 				dq->dq_flags |= DQ_WANT;
 				(void) tsleep((caddr_t)dq, PINOD+1, "chkdq1", 0);
 			}
 			ncurblocks = dq->dq_curblocks + change;
 			if (ncurblocks >= 0)
 				dq->dq_curblocks = ncurblocks;
 			else
 				dq->dq_curblocks = 0;
 			dq->dq_flags &= ~DQ_BLKS;
 			dq->dq_flags |= DQ_MOD;
 		}
 		return (0);
 	}
 	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if ((dq = ip->i_dquot[i]) == NODQUOT)
 				continue;
 			error = chkdqchg(ip, change, cred, i);
 			if (error)
 				return (error);
 		}
 	}
 	for (i = 0; i < MAXQUOTAS; i++) {
 		if ((dq = ip->i_dquot[i]) == NODQUOT)
 			continue;
 		while (dq->dq_flags & DQ_LOCK) {
 			dq->dq_flags |= DQ_WANT;
 			(void) tsleep((caddr_t)dq, PINOD+1, "chkdq2", 0);
 		}
 		dq->dq_curblocks += change;
 		dq->dq_flags |= DQ_MOD;
 	}
 	return (0);
 }
 
 /*
  * Check for a valid change to a users allocation.
  * Issue an error message if appropriate.
  */
 static int
 chkdqchg(ip, change, cred, type)
 	struct inode *ip;
 	long change;
 	struct ucred *cred;
 	int type;
 {
 	register struct dquot *dq = ip->i_dquot[type];
 	long ncurblocks = dq->dq_curblocks + change;
 
 	/*
 	 * If user would exceed their hard limit, disallow space allocation.
 	 */
 	if (ncurblocks >= dq->dq_bhardlimit && dq->dq_bhardlimit) {
 		if ((dq->dq_flags & DQ_BLKS) == 0 &&
 		    ip->i_uid == cred->cr_uid) {
 			uprintf("\n%s: write failed, %s disk limit reached\n",
 			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 			    quotatypes[type]);
 			dq->dq_flags |= DQ_BLKS;
 		}
 		return (EDQUOT);
 	}
 	/*
 	 * If user is over their soft limit for too long, disallow space
 	 * allocation. Reset time limit as they cross their soft limit.
 	 */
 	if (ncurblocks >= dq->dq_bsoftlimit && dq->dq_bsoftlimit) {
 		if (dq->dq_curblocks < dq->dq_bsoftlimit) {
 			dq->dq_btime = time.tv_sec +
 			    VFSTOUFS(ITOV(ip)->v_mount)->um_btime[type];
 			if (ip->i_uid == cred->cr_uid)
 				uprintf("\n%s: warning, %s %s\n",
 				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 				    quotatypes[type], "disk quota exceeded");
 			return (0);
 		}
 		if (time.tv_sec > dq->dq_btime) {
 			if ((dq->dq_flags & DQ_BLKS) == 0 &&
 			    ip->i_uid == cred->cr_uid) {
 				uprintf("\n%s: write failed, %s %s\n",
 				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 				    quotatypes[type],
 				    "disk quota exceeded for too long");
 				dq->dq_flags |= DQ_BLKS;
 			}
 			return (EDQUOT);
 		}
 	}
 	return (0);
 }
 
 /*
  * Check the inode limit, applying corrective action.
  */
 int
 chkiq(ip, change, cred, flags)
 	register struct inode *ip;
 	long change;
 	struct ucred *cred;
 	int flags;
 {
 	register struct dquot *dq;
 	register int i;
 	int ncurinodes, error;
 
 #ifdef DIAGNOSTIC
 	if ((flags & CHOWN) == 0)
 		chkdquot(ip);
 #endif
 	if (change == 0)
 		return (0);
 	if (change < 0) {
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if ((dq = ip->i_dquot[i]) == NODQUOT)
 				continue;
 			while (dq->dq_flags & DQ_LOCK) {
 				dq->dq_flags |= DQ_WANT;
 				(void) tsleep((caddr_t)dq, PINOD+1, "chkiq1", 0);
 			}
 			ncurinodes = dq->dq_curinodes + change;
 			if (ncurinodes >= 0)
 				dq->dq_curinodes = ncurinodes;
 			else
 				dq->dq_curinodes = 0;
 			dq->dq_flags &= ~DQ_INODS;
 			dq->dq_flags |= DQ_MOD;
 		}
 		return (0);
 	}
 	if ((flags & FORCE) == 0 && cred->cr_uid != 0) {
 		for (i = 0; i < MAXQUOTAS; i++) {
 			if ((dq = ip->i_dquot[i]) == NODQUOT)
 				continue;
 			error = chkiqchg(ip, change, cred, i);
 			if (error)
 				return (error);
 		}
 	}
 	for (i = 0; i < MAXQUOTAS; i++) {
 		if ((dq = ip->i_dquot[i]) == NODQUOT)
 			continue;
 		while (dq->dq_flags & DQ_LOCK) {
 			dq->dq_flags |= DQ_WANT;
 			(void) tsleep((caddr_t)dq, PINOD+1, "chkiq2", 0);
 		}
 		dq->dq_curinodes += change;
 		dq->dq_flags |= DQ_MOD;
 	}
 	return (0);
 }
 
 /*
  * Check for a valid change to a users allocation.
  * Issue an error message if appropriate.
  */
 static int
 chkiqchg(ip, change, cred, type)
 	struct inode *ip;
 	long change;
 	struct ucred *cred;
 	int type;
 {
 	register struct dquot *dq = ip->i_dquot[type];
 	long ncurinodes = dq->dq_curinodes + change;
 
 	/*
 	 * If user would exceed their hard limit, disallow inode allocation.
 	 */
 	if (ncurinodes >= dq->dq_ihardlimit && dq->dq_ihardlimit) {
 		if ((dq->dq_flags & DQ_INODS) == 0 &&
 		    ip->i_uid == cred->cr_uid) {
 			uprintf("\n%s: write failed, %s inode limit reached\n",
 			    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 			    quotatypes[type]);
 			dq->dq_flags |= DQ_INODS;
 		}
 		return (EDQUOT);
 	}
 	/*
 	 * If user is over their soft limit for too long, disallow inode
 	 * allocation. Reset time limit as they cross their soft limit.
 	 */
 	if (ncurinodes >= dq->dq_isoftlimit && dq->dq_isoftlimit) {
 		if (dq->dq_curinodes < dq->dq_isoftlimit) {
 			dq->dq_itime = time.tv_sec +
 			    VFSTOUFS(ITOV(ip)->v_mount)->um_itime[type];
 			if (ip->i_uid == cred->cr_uid)
 				uprintf("\n%s: warning, %s %s\n",
 				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 				    quotatypes[type], "inode quota exceeded");
 			return (0);
 		}
 		if (time.tv_sec > dq->dq_itime) {
 			if ((dq->dq_flags & DQ_INODS) == 0 &&
 			    ip->i_uid == cred->cr_uid) {
 				uprintf("\n%s: write failed, %s %s\n",
 				    ITOV(ip)->v_mount->mnt_stat.f_mntonname,
 				    quotatypes[type],
 				    "inode quota exceeded for too long");
 				dq->dq_flags |= DQ_INODS;
 			}
 			return (EDQUOT);
 		}
 	}
 	return (0);
 }
 
 #ifdef DIAGNOSTIC
 /*
  * On filesystems with quotas enabled, it is an error for a file to change
  * size and not to have a dquot structure associated with it.
  */
 static void
 chkdquot(ip)
 	register struct inode *ip;
 {
 	struct ufsmount *ump = VFSTOUFS(ITOV(ip)->v_mount);
 	register int i;
 
 	for (i = 0; i < MAXQUOTAS; i++) {
 		if (ump->um_quotas[i] == NULLVP ||
 		    (ump->um_qflags[i] & (QTF_OPENING|QTF_CLOSING)))
 			continue;
 		if (ip->i_dquot[i] == NODQUOT) {
 			vprint("chkdquot: missing dquot", ITOV(ip));
 			panic("chkdquot: missing dquot");
 		}
 	}
 }
 #endif
 
 /*
  * Code to process quotactl commands.
  */
 
 /*
  * Q_QUOTAON - set up a quota file for a particular file system.
  */
 int
 quotaon(p, mp, type, fname)
 	struct proc *p;
 	struct mount *mp;
 	register int type;
 	caddr_t fname;
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct vnode *vp, **vpp;
 	struct vnode *nextvp;
 	struct dquot *dq;
 	int error;
 	struct nameidata nd;
 
 	vpp = &ump->um_quotas[type];
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, fname, p);
 	error = vn_open(&nd, FREAD|FWRITE, 0);
 	if (error)
 		return (error);
 	vp = nd.ni_vp;
 	VOP_UNLOCK(vp, 0, p);
 	if (vp->v_type != VREG) {
 		(void) vn_close(vp, FREAD|FWRITE, p->p_ucred, p);
 		return (EACCES);
 	}
 	if (*vpp != vp)
 		quotaoff(p, mp, type);
 	ump->um_qflags[type] |= QTF_OPENING;
 	mp->mnt_flag |= MNT_QUOTA;
 	vp->v_flag |= VSYSTEM;
 	*vpp = vp;
 	/*
 	 * Save the credential of the process that turned on quotas.
 	 * Set up the time limits for this quota.
 	 */
 	crhold(p->p_ucred);
 	ump->um_cred[type] = p->p_ucred;
 	ump->um_btime[type] = MAX_DQ_TIME;
 	ump->um_itime[type] = MAX_IQ_TIME;
 	if (dqget(NULLVP, 0, ump, type, &dq) == 0) {
 		if (dq->dq_btime > 0)
 			ump->um_btime[type] = dq->dq_btime;
 		if (dq->dq_itime > 0)
 			ump->um_itime[type] = dq->dq_itime;
 		dqrele(NULLVP, dq);
 	}
 	/*
 	 * Search vnodes associated with this mount point,
 	 * adding references to quota file being opened.
 	 * NB: only need to add dquot's for inodes being modified.
 	 */
 again:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
 		nextvp = vp->v_mntvnodes.le_next;
-		if (vp->v_writecount == 0)
+		if (vp->v_type == VNON || vp->v_writecount == 0)
 			continue;
 		if (vget(vp, LK_EXCLUSIVE, p))
 			goto again;
 		error = getinoquota(VTOI(vp));
 		if (error) {
 			vput(vp);
 			break;
 		}
 		vput(vp);
 		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
 			goto again;
 	}
 	ump->um_qflags[type] &= ~QTF_OPENING;
 	if (error)
 		quotaoff(p, mp, type);
 	return (error);
 }
 
 /*
  * Q_QUOTAOFF - turn off disk quotas for a filesystem.
  */
 int
 quotaoff(p, mp, type)
 	struct proc *p;
 	struct mount *mp;
 	register int type;
 {
 	struct vnode *vp;
 	struct vnode *qvp, *nextvp;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct dquot *dq;
 	struct inode *ip;
 	int error;
 
 	if ((qvp = ump->um_quotas[type]) == NULLVP)
 		return (0);
 	ump->um_qflags[type] |= QTF_CLOSING;
 	/*
 	 * Search vnodes associated with this mount point,
 	 * deleting any references to quota file being closed.
 	 */
 again:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
 		nextvp = vp->v_mntvnodes.le_next;
+		if (vp->v_type == VNON)
+			continue;
 		if (vget(vp, LK_EXCLUSIVE, p))
 			goto again;
 		ip = VTOI(vp);
 		dq = ip->i_dquot[type];
 		ip->i_dquot[type] = NODQUOT;
 		dqrele(vp, dq);
 		vput(vp);
 		if (vp->v_mntvnodes.le_next != nextvp || vp->v_mount != mp)
 			goto again;
 	}
 	dqflush(qvp);
 	qvp->v_flag &= ~VSYSTEM;
 	error = vn_close(qvp, FREAD|FWRITE, p->p_ucred, p);
 	ump->um_quotas[type] = NULLVP;
 	crfree(ump->um_cred[type]);
 	ump->um_cred[type] = NOCRED;
 	ump->um_qflags[type] &= ~QTF_CLOSING;
 	for (type = 0; type < MAXQUOTAS; type++)
 		if (ump->um_quotas[type] != NULLVP)
 			break;
 	if (type == MAXQUOTAS)
 		mp->mnt_flag &= ~MNT_QUOTA;
 	return (error);
 }
 
 /*
  * Q_GETQUOTA - return current values in a dqblk structure.
  */
 int
 getquota(mp, id, type, addr)
 	struct mount *mp;
 	u_long id;
 	int type;
 	caddr_t addr;
 {
 	struct dquot *dq;
 	int error;
 
 	error = dqget(NULLVP, id, VFSTOUFS(mp), type, &dq);
 	if (error)
 		return (error);
 	error = copyout((caddr_t)&dq->dq_dqb, addr, sizeof (struct dqblk));
 	dqrele(NULLVP, dq);
 	return (error);
 }
 
 /*
  * Q_SETQUOTA - assign an entire dqblk structure.
  */
 int
 setquota(mp, id, type, addr)
 	struct mount *mp;
 	u_long id;
 	int type;
 	caddr_t addr;
 {
 	register struct dquot *dq;
 	struct dquot *ndq;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct dqblk newlim;
 	int error;
 
 	error = copyin(addr, (caddr_t)&newlim, sizeof (struct dqblk));
 	if (error)
 		return (error);
 	error = dqget(NULLVP, id, ump, type, &ndq);
 	if (error)
 		return (error);
 	dq = ndq;
 	while (dq->dq_flags & DQ_LOCK) {
 		dq->dq_flags |= DQ_WANT;
 		(void) tsleep((caddr_t)dq, PINOD+1, "setqta", 0);
 	}
 	/*
 	 * Copy all but the current values.
 	 * Reset time limit if previously had no soft limit or were
 	 * under it, but now have a soft limit and are over it.
 	 */
 	newlim.dqb_curblocks = dq->dq_curblocks;
 	newlim.dqb_curinodes = dq->dq_curinodes;
 	if (dq->dq_id != 0) {
 		newlim.dqb_btime = dq->dq_btime;
 		newlim.dqb_itime = dq->dq_itime;
 	}
 	if (newlim.dqb_bsoftlimit &&
 	    dq->dq_curblocks >= newlim.dqb_bsoftlimit &&
 	    (dq->dq_bsoftlimit == 0 || dq->dq_curblocks < dq->dq_bsoftlimit))
 		newlim.dqb_btime = time.tv_sec + ump->um_btime[type];
 	if (newlim.dqb_isoftlimit &&
 	    dq->dq_curinodes >= newlim.dqb_isoftlimit &&
 	    (dq->dq_isoftlimit == 0 || dq->dq_curinodes < dq->dq_isoftlimit))
 		newlim.dqb_itime = time.tv_sec + ump->um_itime[type];
 	dq->dq_dqb = newlim;
 	if (dq->dq_curblocks < dq->dq_bsoftlimit)
 		dq->dq_flags &= ~DQ_BLKS;
 	if (dq->dq_curinodes < dq->dq_isoftlimit)
 		dq->dq_flags &= ~DQ_INODS;
 	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
 	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
 		dq->dq_flags |= DQ_FAKE;
 	else
 		dq->dq_flags &= ~DQ_FAKE;
 	dq->dq_flags |= DQ_MOD;
 	dqrele(NULLVP, dq);
 	return (0);
 }
 
 /*
  * Q_SETUSE - set current inode and block usage.
  */
 int
 setuse(mp, id, type, addr)
 	struct mount *mp;
 	u_long id;
 	int type;
 	caddr_t addr;
 {
 	register struct dquot *dq;
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct dquot *ndq;
 	struct dqblk usage;
 	int error;
 
 	error = copyin(addr, (caddr_t)&usage, sizeof (struct dqblk));
 	if (error)
 		return (error);
 	error = dqget(NULLVP, id, ump, type, &ndq);
 	if (error)
 		return (error);
 	dq = ndq;
 	while (dq->dq_flags & DQ_LOCK) {
 		dq->dq_flags |= DQ_WANT;
 		(void) tsleep((caddr_t)dq, PINOD+1, "setuse", 0);
 	}
 	/*
 	 * Reset time limit if have a soft limit and were
 	 * previously under it, but are now over it.
 	 */
 	if (dq->dq_bsoftlimit && dq->dq_curblocks < dq->dq_bsoftlimit &&
 	    usage.dqb_curblocks >= dq->dq_bsoftlimit)
 		dq->dq_btime = time.tv_sec + ump->um_btime[type];
 	if (dq->dq_isoftlimit && dq->dq_curinodes < dq->dq_isoftlimit &&
 	    usage.dqb_curinodes >= dq->dq_isoftlimit)
 		dq->dq_itime = time.tv_sec + ump->um_itime[type];
 	dq->dq_curblocks = usage.dqb_curblocks;
 	dq->dq_curinodes = usage.dqb_curinodes;
 	if (dq->dq_curblocks < dq->dq_bsoftlimit)
 		dq->dq_flags &= ~DQ_BLKS;
 	if (dq->dq_curinodes < dq->dq_isoftlimit)
 		dq->dq_flags &= ~DQ_INODS;
 	dq->dq_flags |= DQ_MOD;
 	dqrele(NULLVP, dq);
 	return (0);
 }
 
 /*
  * Q_SYNC - sync quota files to disk.
  */
 int
 qsync(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump = VFSTOUFS(mp);
 	struct proc *p = curproc;		/* XXX */
 	struct vnode *vp, *nextvp;
 	struct dquot *dq;
 	int i, error;
 
 	/*
 	 * Check if the mount point has any quotas.
 	 * If not, simply return.
 	 */
 	for (i = 0; i < MAXQUOTAS; i++)
 		if (ump->um_quotas[i] != NULLVP)
 			break;
 	if (i == MAXQUOTAS)
 		return (0);
 	/*
 	 * Search vnodes associated with this mount point,
 	 * synchronizing any modified dquot structures.
 	 */
 	simple_lock(&mntvnode_slock);
 again:
 	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nextvp) {
 		if (vp->v_mount != mp)
 			goto again;
 		nextvp = vp->v_mntvnodes.le_next;
+		if (vp->v_type == VNON)
+			continue;
 		simple_lock(&vp->v_interlock);
 		simple_unlock(&mntvnode_slock);
 		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
 		if (error) {
 			simple_lock(&mntvnode_slock);
 			if (error == ENOENT)
 				goto again;
 			continue;
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dq = VTOI(vp)->i_dquot[i];
 			if (dq != NODQUOT && (dq->dq_flags & DQ_MOD))
 				dqsync(vp, dq);
 		}
 		vput(vp);
 		simple_lock(&mntvnode_slock);
 		if (vp->v_mntvnodes.le_next != nextvp)
 			goto again;
 	}
 	simple_unlock(&mntvnode_slock);
 	return (0);
 }
 
 /*
  * Code pertaining to management of the in-core dquot data structures.
  */
 #define DQHASH(dqvp, id) \
 	(&dqhashtbl[((((int)(dqvp)) >> 8) + id) & dqhash])
 static LIST_HEAD(dqhash, dquot) *dqhashtbl;
 static u_long dqhash;
 
 /*
  * Dquot free list.
  */
 #define	DQUOTINC	5	/* minimum free dquots desired */
 static TAILQ_HEAD(dqfreelist, dquot) dqfreelist;
 static long numdquot, desireddquot = DQUOTINC;
 
 /*
  * Initialize the quota system.
  */
 void
 dqinit()
 {
 
 	dqhashtbl = hashinit(desiredvnodes, M_DQUOT, &dqhash);
 	TAILQ_INIT(&dqfreelist);
 }
 
 /*
  * Obtain a dquot structure for the specified identifier and quota file
  * reading the information from the file if necessary.
  */
 static int
 dqget(vp, id, ump, type, dqp)
 	struct vnode *vp;
 	u_long id;
 	register struct ufsmount *ump;
 	register int type;
 	struct dquot **dqp;
 {
 	struct proc *p = curproc;		/* XXX */
 	struct dquot *dq;
 	struct dqhash *dqh;
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 
 	dqvp = ump->um_quotas[type];
 	if (dqvp == NULLVP || (ump->um_qflags[type] & QTF_CLOSING)) {
 		*dqp = NODQUOT;
 		return (EINVAL);
 	}
 	/*
 	 * Check the cache first.
 	 */
 	dqh = DQHASH(dqvp, id);
 	for (dq = dqh->lh_first; dq; dq = dq->dq_hash.le_next) {
 		if (dq->dq_id != id ||
 		    dq->dq_ump->um_quotas[dq->dq_type] != dqvp)
 			continue;
 		/*
 		 * Cache hit with no references.  Take
 		 * the structure off the free list.
 		 */
 		if (dq->dq_cnt == 0)
 			TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
 		DQREF(dq);
 		*dqp = dq;
 		return (0);
 	}
 	/*
 	 * Not in cache, allocate a new one.
 	 */
 	if (dqfreelist.tqh_first == NODQUOT &&
 	    numdquot < MAXQUOTAS * desiredvnodes)
 		desireddquot += DQUOTINC;
 	if (numdquot < desireddquot) {
 		dq = (struct dquot *)malloc(sizeof *dq, M_DQUOT, M_WAITOK);
 		bzero((char *)dq, sizeof *dq);
 		numdquot++;
 	} else {
 		if ((dq = dqfreelist.tqh_first) == NULL) {
 			tablefull("dquot");
 			*dqp = NODQUOT;
 			return (EUSERS);
 		}
 		if (dq->dq_cnt || (dq->dq_flags & DQ_MOD))
 			panic("dqget: free dquot isn't");
 		TAILQ_REMOVE(&dqfreelist, dq, dq_freelist);
 		LIST_REMOVE(dq, dq_hash);
 	}
 	/*
 	 * Initialize the contents of the dquot structure.
 	 */
 	if (vp != dqvp)
 		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	LIST_INSERT_HEAD(dqh, dq, dq_hash);
 	DQREF(dq);
 	dq->dq_flags = DQ_LOCK;
 	dq->dq_id = id;
 	dq->dq_ump = ump;
 	dq->dq_type = type;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = (caddr_t)&dq->dq_dqb;
 	aiov.iov_len = sizeof (struct dqblk);
 	auio.uio_resid = sizeof (struct dqblk);
 	auio.uio_offset = (off_t)(id * sizeof (struct dqblk));
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_procp = (struct proc *)0;
 	error = VOP_READ(dqvp, &auio, 0, ump->um_cred[type]);
 	if (auio.uio_resid == sizeof(struct dqblk) && error == 0)
 		bzero((caddr_t)&dq->dq_dqb, sizeof(struct dqblk));
 	if (vp != dqvp)
 		VOP_UNLOCK(dqvp, 0, p);
 	if (dq->dq_flags & DQ_WANT)
 		wakeup((caddr_t)dq);
 	dq->dq_flags = 0;
 	/*
 	 * I/O error in reading quota file, release
 	 * quota structure and reflect problem to caller.
 	 */
 	if (error) {
 		LIST_REMOVE(dq, dq_hash);
 		dqrele(vp, dq);
 		*dqp = NODQUOT;
 		return (error);
 	}
 	/*
 	 * Check for no limit to enforce.
 	 * Initialize time values if necessary.
 	 */
 	if (dq->dq_isoftlimit == 0 && dq->dq_bsoftlimit == 0 &&
 	    dq->dq_ihardlimit == 0 && dq->dq_bhardlimit == 0)
 		dq->dq_flags |= DQ_FAKE;
 	if (dq->dq_id != 0) {
 		if (dq->dq_btime == 0)
 			dq->dq_btime = time.tv_sec + ump->um_btime[type];
 		if (dq->dq_itime == 0)
 			dq->dq_itime = time.tv_sec + ump->um_itime[type];
 	}
 	*dqp = dq;
 	return (0);
 }
 
 #ifdef DIAGNOSTIC
 /*
  * Obtain a reference to a dquot.
  */
 static void
 dqref(dq)
 	struct dquot *dq;
 {
 
 	dq->dq_cnt++;
 }
 #endif
 
 /*
  * Release a reference to a dquot.
  */
 void
 dqrele(vp, dq)
 	struct vnode *vp;
 	register struct dquot *dq;
 {
 
 	if (dq == NODQUOT)
 		return;
 	if (dq->dq_cnt > 1) {
 		dq->dq_cnt--;
 		return;
 	}
 	if (dq->dq_flags & DQ_MOD)
 		(void) dqsync(vp, dq);
 	if (--dq->dq_cnt > 0)
 		return;
 	TAILQ_INSERT_TAIL(&dqfreelist, dq, dq_freelist);
 }
 
 /*
  * Update the disk quota in the quota file.
  */
 static int
 dqsync(vp, dq)
 	struct vnode *vp;
 	struct dquot *dq;
 {
 	struct proc *p = curproc;		/* XXX */
 	struct vnode *dqvp;
 	struct iovec aiov;
 	struct uio auio;
 	int error;
 
 	if (dq == NODQUOT)
 		panic("dqsync: dquot");
 	if ((dq->dq_flags & DQ_MOD) == 0)
 		return (0);
 	if ((dqvp = dq->dq_ump->um_quotas[dq->dq_type]) == NULLVP)
 		panic("dqsync: file");
 	if (vp != dqvp)
 		vn_lock(dqvp, LK_EXCLUSIVE | LK_RETRY, p);
 	while (dq->dq_flags & DQ_LOCK) {
 		dq->dq_flags |= DQ_WANT;
 		(void) tsleep((caddr_t)dq, PINOD+2, "dqsync", 0);
 		if ((dq->dq_flags & DQ_MOD) == 0) {
 			if (vp != dqvp)
 				VOP_UNLOCK(dqvp, 0, p);
 			return (0);
 		}
 	}
 	dq->dq_flags |= DQ_LOCK;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	aiov.iov_base = (caddr_t)&dq->dq_dqb;
 	aiov.iov_len = sizeof (struct dqblk);
 	auio.uio_resid = sizeof (struct dqblk);
 	auio.uio_offset = (off_t)(dq->dq_id * sizeof (struct dqblk));
 	auio.uio_segflg = UIO_SYSSPACE;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_procp = (struct proc *)0;
 	error = VOP_WRITE(dqvp, &auio, 0, dq->dq_ump->um_cred[dq->dq_type]);
 	if (auio.uio_resid && error == 0)
 		error = EIO;
 	if (dq->dq_flags & DQ_WANT)
 		wakeup((caddr_t)dq);
 	dq->dq_flags &= ~(DQ_MOD|DQ_LOCK|DQ_WANT);
 	if (vp != dqvp)
 		VOP_UNLOCK(dqvp, 0, p);
 	return (error);
 }
 
 /*
  * Flush all entries from the cache for a particular vnode.
  */
 static void
 dqflush(vp)
 	register struct vnode *vp;
 {
 	register struct dquot *dq, *nextdq;
 	struct dqhash *dqh;
 
 	/*
 	 * Move all dquot's that used to refer to this quota
 	 * file off their hash chains (they will eventually
 	 * fall off the head of the free list and be re-used).
 	 */
 	for (dqh = &dqhashtbl[dqhash]; dqh >= dqhashtbl; dqh--) {
 		for (dq = dqh->lh_first; dq; dq = nextdq) {
 			nextdq = dq->dq_hash.le_next;
 			if (dq->dq_ump->um_quotas[dq->dq_type] != vp)
 				continue;
 			if (dq->dq_cnt)
 				panic("dqflush: stray dquot");
 			LIST_REMOVE(dq, dq_hash);
 			dq->dq_ump = (struct ufsmount *)0;
 		}
 	}
 }
Index: head/sys/ufs/ufs/ufs_readwrite.c
===================================================================
--- head/sys/ufs/ufs/ufs_readwrite.c	(revision 34265)
+++ head/sys/ufs/ufs/ufs_readwrite.c	(revision 34266)
@@ -1,609 +1,609 @@
 /*-
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_readwrite.c	8.11 (Berkeley) 5/8/95
- * $Id: ufs_readwrite.c,v 1.43 1998/02/26 06:39:50 msmith Exp $
+ * $Id: ufs_readwrite.c,v 1.44 1998/03/07 21:36:42 dyson Exp $
  */
 
 #define	BLKSIZE(a, b, c)	blksize(a, b, c)
 #define	FS			struct fs
 #define	I_FS			i_fs
 #define	READ			ffs_read
 #define	READ_S			"ffs_read"
 #define	WRITE			ffs_write
 #define	WRITE_S			"ffs_write"
 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_pager.h>
 #include <vm/vm_map.h>
 #include <vm/vnode_pager.h>
 #include <sys/poll.h>
 #include <sys/sysctl.h>
 
 /*
  * Vnode op for reading.
  */
 /* ARGSUSED */
 int
 READ(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp;
 	register struct inode *ip;
 	register struct uio *uio;
 	register FS *fs;
 	struct buf *bp;
 	ufs_daddr_t lbn, nextlbn;
 	off_t bytesinfile;
 	long size, xfersize, blkoffset;
 	int error;
 	u_short mode;
 	int seqcount;
 	int ioflag;
 	vm_object_t object;
 
 	vp = ap->a_vp;
 	seqcount = ap->a_ioflag >> 16;
 	ip = VTOI(vp);
 	mode = ip->i_mode;
 	uio = ap->a_uio;
 	ioflag = ap->a_ioflag;
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_READ)
 		panic("%s: mode", READ_S);
 
 	if (vp->v_type == VLNK) {
 		if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen)
 			panic("%s: short symlink", READ_S);
 	} else if (vp->v_type != VREG && vp->v_type != VDIR)
 		panic("%s: type %d", READ_S, vp->v_type);
 #endif
 	fs = ip->I_FS;
 	if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
 		return (EFBIG);
 
 	object = vp->v_object;
 
 	bytesinfile = ip->i_size - uio->uio_offset;
 	if (bytesinfile <= 0) {
 		return 0;
 	}
 
 	if (object)
 		vm_object_reference(object);
 #if 1
 	if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
 		int nread, toread;
 		toread = uio->uio_resid;
 		if (toread > bytesinfile)
 			toread = bytesinfile;
 		if (toread >= PAGE_SIZE) {
 			error = uioread(toread, uio, object, &nread);
 			if ((uio->uio_resid == 0) || (error != 0)) {
 				if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
 					ip->i_flag |= IN_ACCESS;
 				if (object)
 					vm_object_vndeallocate(object);
 				return error;
 			}
 		}
 	}
 #endif
 
 	for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 		if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0)
 			break;
 #if 1
 		if ((ioflag & IO_VMIO) == 0 && (vfs_ioopt > 1) && object) {
 			int nread, toread;
 			toread = uio->uio_resid;
 			if (toread > bytesinfile)
 				toread = bytesinfile;
 			if (toread >= PAGE_SIZE) {
 				error = uioread(toread, uio, object, &nread);
 				if ((uio->uio_resid == 0) || (error != 0)) {
 					if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
 						ip->i_flag |= IN_ACCESS;
 					if (object)
 						vm_object_vndeallocate(object);
 					return error;
 				}
 				if (nread > 0) {
 					continue;
 				}
 			}
 		}
 #endif
 
 		lbn = lblkno(fs, uio->uio_offset);
 		nextlbn = lbn + 1;
 		size = BLKSIZE(fs, ip, lbn);
 		blkoffset = blkoff(fs, uio->uio_offset);
 
 		xfersize = fs->fs_bsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 		if (bytesinfile < xfersize)
 			xfersize = bytesinfile;
 
 		if (lblktosize(fs, nextlbn) >= ip->i_size)
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0)
 			error = cluster_read(vp, ip->i_size, lbn,
 				size, NOCRED, uio->uio_resid, seqcount, &bp);
 		else if (lbn - 1 == vp->v_lastr) {
 			int nextsize = BLKSIZE(fs, ip, nextlbn);
 			error = breadn(vp, lbn,
 			    size, &nextlbn, &nextsize, 1, NOCRED, &bp);
 		} else
 			error = bread(vp, lbn, size, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			bp = NULL;
 			break;
 		}
 		vp->v_lastr = lbn;
 
 		/*
 		 * We should only get non-zero b_resid when an I/O error
 		 * has occurred, which should cause us to break above.
 		 * However, if the short read did not cause an error,
 		 * then we want to ensure that we do not uiomove bad
 		 * or uninitialized data.
 		 */
 		size -= bp->b_resid;
 		if (size < xfersize) {
 			if (size == 0)
 				break;
 			xfersize = size;
 		}
 
 		if (vfs_ioopt && object &&
 				(bp->b_flags & B_VMIO) &&
 				((blkoffset & PAGE_MASK) == 0) &&
 				((xfersize & PAGE_MASK) == 0)) {
 			error =
 				uiomoveco((char *)bp->b_data + blkoffset,
 					(int)xfersize, uio, object);
 		} else {
 			error =
 				uiomove((char *)bp->b_data + blkoffset,
 					(int)xfersize, uio);
 		}
 
 		if (error)
 			break;
 
 		if (ioflag & IO_VMIO) {
 			bp->b_flags |= B_RELBUF;
 			brelse(bp);
 		} else {
 			bqrelse(bp);
 		}
 	}
 
 	if (bp != NULL) {
 		if (ioflag & IO_VMIO) {
 			bp->b_flags |= B_RELBUF;
 			brelse(bp);
 		} else {
 			bqrelse(bp);
 		}
 	}
 
 	if (object)
 		vm_object_vndeallocate(object);
 	if (!(vp->v_mount->mnt_flag & MNT_NOATIME))
 		ip->i_flag |= IN_ACCESS;
 	return (error);
 }
 
 /*
  * Vnode op for writing.
  */
 int
 WRITE(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp;
 	register struct uio *uio;
 	register struct inode *ip;
 	register FS *fs;
 	struct buf *bp;
 	struct proc *p;
 	ufs_daddr_t lbn;
 	off_t osize;
 	int blkoffset, error, extended, flags, ioflag, resid, size, xfersize;
 	struct timeval tv;
 	vm_object_t object;
 
 	extended = 0;
 	ioflag = ap->a_ioflag;
 	uio = ap->a_uio;
 	vp = ap->a_vp;
 	ip = VTOI(vp);
 
 	object = vp->v_object;
 	if (object)
 		vm_object_reference(object);
 
 #ifdef DIAGNOSTIC
 	if (uio->uio_rw != UIO_WRITE)
 		panic("%s: mode", WRITE_S);
 #endif
 
 	switch (vp->v_type) {
 	case VREG:
 		if (ioflag & IO_APPEND)
 			uio->uio_offset = ip->i_size;
 		if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
 			if (object)
 				vm_object_vndeallocate(object);
 			return (EPERM);
 		}
 		/* FALLTHROUGH */
 	case VLNK:
 		break;
 	case VDIR:
 		if ((ioflag & IO_SYNC) == 0)
 			panic("%s: nonsync dir write", WRITE_S);
 		break;
 	default:
 		panic("%s: type", WRITE_S);
 	}
 
 	fs = ip->I_FS;
 	if (uio->uio_offset < 0 ||
 	    (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
 		if (object)
 			vm_object_vndeallocate(object);
 		return (EFBIG);
 	}
 	/*
 	 * Maybe this should be above the vnode op call, but so long as
 	 * file servers have no limits, I don't think it matters.
 	 */
 	p = uio->uio_procp;
 	if (vp->v_type == VREG && p &&
 	    uio->uio_offset + uio->uio_resid >
 	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 		psignal(p, SIGXFSZ);
 		if (object)
 			vm_object_vndeallocate(object);
 		return (EFBIG);
 	}
 
 	resid = uio->uio_resid;
 	osize = ip->i_size;
 	flags = ioflag & IO_SYNC ? B_SYNC : 0;
 
 	if (object && (object->flags & OBJ_OPT)) {
 		vm_freeze_copyopts(object,
 			OFF_TO_IDX(uio->uio_offset),
 			OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
 	}
 
 	for (error = 0; uio->uio_resid > 0;) {
 		lbn = lblkno(fs, uio->uio_offset);
 		blkoffset = blkoff(fs, uio->uio_offset);
 		xfersize = fs->fs_bsize - blkoffset;
 		if (uio->uio_resid < xfersize)
 			xfersize = uio->uio_resid;
 
 		if (uio->uio_offset + xfersize > ip->i_size)
 			vnode_pager_setsize(vp, uio->uio_offset + xfersize);
 
 		if (fs->fs_bsize > xfersize)
 			flags |= B_CLRBUF;
 		else
 			flags &= ~B_CLRBUF;
-
-		error = ffs_balloc(ip,
-		    lbn, blkoffset + xfersize, ap->a_cred, &bp, flags);
-		if (error)
+/* XXX is uio->uio_offset the right thing here? */
+		error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
+		    ap->a_cred, flags, &bp);
+		if (error != 0)
 			break;
 
 		if (uio->uio_offset + xfersize > ip->i_size) {
 			ip->i_size = uio->uio_offset + xfersize;
 			extended = 1;
 		}
 
 		size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
 		if (size < xfersize)
 			xfersize = size;
 
 		error =
 		    uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio);
 		if (ioflag & IO_VMIO)
 			bp->b_flags |= B_RELBUF;
 
 		if (ioflag & IO_SYNC) {
 			(void)bwrite(bp);
 		} else if (xfersize + blkoffset == fs->fs_bsize) {
 			if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
 				bp->b_flags |= B_CLUSTEROK;
 				cluster_write(bp, ip->i_size);
 			} else {
 				bawrite(bp);
 			}
 		} else {
 			bp->b_flags |= B_CLUSTEROK;
 			bdwrite(bp);
 		}
 		if (error || xfersize == 0)
 			break;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * If we successfully wrote any data, and we are not the superuser
 	 * we clear the setuid and setgid bits as a precaution against
 	 * tampering.
 	 */
 	if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 		ip->i_mode &= ~(ISUID | ISGID);
 	if (error) {
 		if (ioflag & IO_UNIT) {
 			(void)UFS_TRUNCATE(vp, osize,
 			    ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 			uio->uio_offset -= resid - uio->uio_resid;
 			uio->uio_resid = resid;
 		}
 	} else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 		gettime(&tv);
 		error = UFS_UPDATE(vp, &tv, &tv, 1);
 	}
 	if (!error)
 		VN_POLLEVENT(vp, POLLWRITE | (extended ? POLLEXTEND : 0));
 
 	if (object)
 		vm_object_vndeallocate(object);
 
 	return (error);
 }
 
 
 /*
  * get page routine
  */
 int
 ffs_getpages(ap)
 	struct vop_getpages_args *ap;
 {
 	off_t foff, physoffset;
 	int i, size, bsize;
 	struct vnode *dp, *vp;
 	vm_object_t obj;
 	vm_pindex_t pindex, firstindex;
 	vm_page_t m, mreq;
 	int bbackwards, bforwards;
 	int pbackwards, pforwards;
 	int firstpage;
 	int reqlblkno;
 	daddr_t reqblkno;
 	int poff;
 	int pcount;
 	int rtval;
 	int pagesperblock;
 
 
 	pcount = round_page(ap->a_count) / PAGE_SIZE;
 	mreq = ap->a_m[ap->a_reqpage];
 	firstindex = ap->a_m[0]->pindex;
 
 	/*
 	 * if ANY DEV_BSIZE blocks are valid on a large filesystem block
 	 * then, the entire page is valid --
 	 */
 	if (mreq->valid) {
 		mreq->valid = VM_PAGE_BITS_ALL;
 		for (i = 0; i < pcount; i++) {
 			if (i != ap->a_reqpage) {
 				vm_page_free(ap->a_m[i]);
 			}
 		}
 		return VM_PAGER_OK;
 	}
 
 	vp = ap->a_vp;
 	obj = vp->v_object;
 	bsize = vp->v_mount->mnt_stat.f_iosize;
 	pindex = mreq->pindex;
 	foff = IDX_TO_OFF(pindex) /* + ap->a_offset should be zero */;
 
 	if (firstindex == 0)
 		vp->v_lastr = 0;
 
 	if ((obj->behavior != OBJ_RANDOM) &&
 		((firstindex != 0) && (firstindex <= vp->v_lastr) &&
 		 ((firstindex + pcount) > vp->v_lastr)) ||
 		(obj->behavior == OBJ_SEQUENTIAL)) {
 		struct uio auio;
 		struct iovec aiov;
 		int error;
 
 		for (i = 0; i < pcount; i++) {
 			m = ap->a_m[i];
 			vm_page_activate(m);
 			m->busy++;
 			m->flags &= ~PG_BUSY;
 		}
 
 		auio.uio_iov = &aiov;
 		auio.uio_iovcnt = 1;
 		aiov.iov_base = 0;
 		aiov.iov_len = MAXBSIZE;
 		auio.uio_resid = MAXBSIZE;
 		auio.uio_offset = foff;
 		auio.uio_segflg = UIO_NOCOPY;
 		auio.uio_rw = UIO_READ;
 		auio.uio_procp = curproc;
 		error = VOP_READ(vp, &auio,
 			IO_VMIO | ((MAXBSIZE / bsize) << 16), curproc->p_ucred);
 
 		for (i = 0; i < pcount; i++) {
 			m = ap->a_m[i];
 			m->busy--;
 
 			if ((m != mreq) && (m->wire_count == 0) && (m->hold_count == 0) &&
 				(m->valid == 0) && (m->busy == 0) &&
 				(m->flags & PG_BUSY) == 0) {
 				m->flags |= PG_BUSY;
 				vm_page_free(m);
 			} else if (m == mreq) {
 				while (m->flags & PG_BUSY) {
 					vm_page_sleep(m, "ffspwt", NULL);
 				}
 				m->flags |= PG_BUSY;
 				vp->v_lastr = m->pindex + 1;
 			} else {
 				if (m->wire_count == 0) {
 					if (m->busy || (m->flags & PG_MAPPED) ||
 						(m->flags & (PG_WANTED | PG_BUSY)) == PG_WANTED) {
 						vm_page_activate(m);
 					} else {
 						vm_page_deactivate(m);
 					}
 				}
 				vp->v_lastr = m->pindex + 1;
 			}
 		}
 
 		if (mreq->valid == 0) 
 			return VM_PAGER_ERROR;
 
 		mreq->valid = VM_PAGE_BITS_ALL;
 		return VM_PAGER_OK;
 	}
 
 	/*
 	 * foff is the file offset of the required page
 	 * reqlblkno is the logical block that contains the page
 	 * poff is the index of the page into the logical block
 	 */
 	reqlblkno = foff / bsize;
 	poff = (foff % bsize) / PAGE_SIZE;
 
 	if ( VOP_BMAP( vp, reqlblkno, &dp, &reqblkno,
 		&bforwards, &bbackwards) || (reqblkno == -1)) {
 		for(i = 0; i < pcount; i++) {
 			if (i != ap->a_reqpage)
 				vm_page_free(ap->a_m[i]);
 		}
 		if (reqblkno == -1) {
 			if ((mreq->flags & PG_ZERO) == 0)
 				vm_page_zero_fill(mreq);
 			mreq->dirty = 0;
 			mreq->valid = VM_PAGE_BITS_ALL;
 			return VM_PAGER_OK;
 		} else {
 			return VM_PAGER_ERROR;
 		}
 	}
 
 	physoffset = (off_t)reqblkno * DEV_BSIZE + poff * PAGE_SIZE;
 	pagesperblock = bsize / PAGE_SIZE;
 	/*
 	 * find the first page that is contiguous...
 	 * note that pbackwards is the number of pages that are contiguous
 	 * backwards.
 	 */
 	firstpage = 0;
 	if (ap->a_count) {
 		pbackwards = poff + bbackwards * pagesperblock;
 		if (ap->a_reqpage > pbackwards) {
 			firstpage = ap->a_reqpage - pbackwards;
 			for(i=0;i<firstpage;i++)
 				vm_page_free(ap->a_m[i]);
 		}
 
 	/*
 	 * pforwards is the number of pages that are contiguous
 	 * after the current page.
 	 */
 		pforwards = (pagesperblock - (poff + 1)) +
 			bforwards * pagesperblock;
 		if (pforwards < (pcount - (ap->a_reqpage + 1))) {
 			for( i = ap->a_reqpage + pforwards + 1; i < pcount; i++)
 				vm_page_free(ap->a_m[i]);
 			pcount = ap->a_reqpage + pforwards + 1;
 		}
 
 	/*
 	 * number of pages for I/O corrected for the non-contig pages at
 	 * the beginning of the array.
 	 */
 		pcount -= firstpage;
 	}
 
 	/*
 	 * calculate the size of the transfer
 	 */
 
 	size = pcount * PAGE_SIZE;
 	vp->v_lastr = mreq->pindex + pcount;
 
 	if ((IDX_TO_OFF(ap->a_m[firstpage]->pindex) + size) >
 		obj->un_pager.vnp.vnp_size)
 		size = obj->un_pager.vnp.vnp_size - foff;
 
 	physoffset -= foff;
 	rtval = VOP_GETPAGES(dp, &ap->a_m[firstpage], size,
 		(ap->a_reqpage - firstpage), physoffset);
 
 	return (rtval);
 }
 
 /*
  * put page routine
  *
  * XXX By default, wimp out... note that a_offset is ignored (and always
  * XXX has been).
  */
 int
 ffs_putpages(ap)
 	struct vop_putpages_args *ap;
 {
 	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
 		ap->a_sync, ap->a_rtvals);
 }
Index: head/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- head/sys/ufs/ufs/ufs_vnops.c	(revision 34265)
+++ head/sys/ufs/ufs/ufs_vnops.c	(revision 34266)
@@ -1,2245 +1,2255 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
- * $Id: ufs_vnops.c,v 1.77 1998/02/06 12:14:19 eivind Exp $
+ * $Id: ufs_vnops.c,v 1.78 1998/02/09 06:11:14 eivind Exp $
  */
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/dirent.h>
 #include <sys/lockf.h>
 #include <sys/poll.h>
 
 #include <vm/vm_zone.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
 
 #include <miscfs/specfs/specdev.h>
 #include <miscfs/fifofs/fifo.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 static int ufs_abortop __P((struct vop_abortop_args *));
 static int ufs_access __P((struct vop_access_args *));
 static int ufs_advlock __P((struct vop_advlock_args *));
 static int ufs_chmod __P((struct vnode *, int, struct ucred *, struct proc *));
 static int ufs_chown __P((struct vnode *, uid_t, gid_t, struct ucred *, struct proc *));
 static int ufs_close __P((struct vop_close_args *));
 static int ufs_create __P((struct vop_create_args *));
 static int ufs_getattr __P((struct vop_getattr_args *));
 static int ufs_link __P((struct vop_link_args *));
 static int ufs_makeinode __P((int mode, struct vnode *, struct vnode **, struct componentname *));
 static int ufs_missingop __P((struct vop_generic_args *ap));
 static int ufs_mkdir __P((struct vop_mkdir_args *));
 static int ufs_mknod __P((struct vop_mknod_args *));
 static int ufs_mmap __P((struct vop_mmap_args *));
 static int ufs_open __P((struct vop_open_args *));
 static int ufs_pathconf __P((struct vop_pathconf_args *));
 static int ufs_print __P((struct vop_print_args *));
 static int ufs_readdir __P((struct vop_readdir_args *));
 static int ufs_readlink __P((struct vop_readlink_args *));
 static int ufs_remove __P((struct vop_remove_args *));
 static int ufs_rename __P((struct vop_rename_args *));
 static int ufs_rmdir __P((struct vop_rmdir_args *));
 static int ufs_setattr __P((struct vop_setattr_args *));
 static int ufs_strategy __P((struct vop_strategy_args *));
 static int ufs_symlink __P((struct vop_symlink_args *));
 static int ufs_whiteout __P((struct vop_whiteout_args *));
 static int ufsfifo_close __P((struct vop_close_args *));
 static int ufsfifo_read __P((struct vop_read_args *));
 static int ufsfifo_write __P((struct vop_write_args *));
 static int ufsspec_close __P((struct vop_close_args *));
 static int ufsspec_read __P((struct vop_read_args *));
 static int ufsspec_write __P((struct vop_write_args *));
 
 
 union _qcvt {
 	int64_t qcvt;
 	int32_t val[2];
 };
 #define SETHIGH(q, h) { \
 	union _qcvt tmp; \
 	tmp.qcvt = (q); \
 	tmp.val[_QUAD_HIGHWORD] = (h); \
 	(q) = tmp.qcvt; \
 }
 #define SETLOW(q, l) { \
 	union _qcvt tmp; \
 	tmp.qcvt = (q); \
 	tmp.val[_QUAD_LOWWORD] = (l); \
 	(q) = tmp.qcvt; \
 }
 
 /*
+ * A virgin directory (no blushing please).
+ */
+static struct dirtemplate mastertemplate = {
+	0, 12, DT_DIR, 1, ".",
+	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
+};
+static struct odirtemplate omastertemplate = {
+	0, 12, 1, ".",
+	0, DIRBLKSIZ - 12, 2, ".."
+};
+
+/*
  * Create a regular file
  */
 int
 ufs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	int error;
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 int
 ufs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		ip->i_rdev = vap->va_rdev;
 	}
 	/*
 	 * Remove inode so that it will be reloaded by VFS_VGET and
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.
 	 */
 	vput(*vpp);
 	(*vpp)->v_type = VNON;
 	vgone(*vpp);
 	*vpp = 0;
 	return (0);
 }
 
 /*
  * Open called.
  *
  * Nothing to do.
  */
 /* ARGSUSED */
 int
 ufs_open(ap)
 	struct vop_open_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((VTOI(ap->a_vp)->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 int
 ufs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 
 	simple_lock(&vp->v_interlock);
 	if (vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
 	simple_unlock(&vp->v_interlock);
 	return (0);
 }
 
 int
 ufs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	mode_t mask, mode = ap->a_mode;
 	register gid_t *gp;
 	int i;
 #ifdef QUOTA
 	int error;
 #endif
 
 	/*
 	 * Disallow write attempts on read-only file systems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the file system.
 	 */
 	if (mode & VWRITE) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			if (error = getinoquota(ip))
 				return (error);
 #endif
 			break;
+		default:
+			break;
 		}
 	}
 
 	/* If immutable bit set, nobody gets to write it. */
 	if ((mode & VWRITE) && (ip->i_flags & IMMUTABLE))
 		return (EPERM);
 
 	/* Otherwise, user id 0 always gets access. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	mask = 0;
 
 	/* Otherwise, check the owner. */
 	if (cred->cr_uid == ip->i_uid) {
 		if (mode & VEXEC)
 			mask |= S_IXUSR;
 		if (mode & VREAD)
 			mask |= S_IRUSR;
 		if (mode & VWRITE)
 			mask |= S_IWUSR;
 		return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 	}
 
 	/* Otherwise, check the groups. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (ip->i_gid == *gp) {
 			if (mode & VEXEC)
 				mask |= S_IXGRP;
 			if (mode & VREAD)
 				mask |= S_IRGRP;
 			if (mode & VWRITE)
 				mask |= S_IWGRP;
 			return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 		}
 
 	/* Otherwise, check everyone else. */
 	if (mode & VEXEC)
 		mask |= S_IXOTH;
 	if (mode & VREAD)
 		mask |= S_IROTH;
 	if (mode & VWRITE)
 		mask |= S_IWOTH;
 	return ((ip->i_mode & mask) == mask ? 0 : EACCES);
 }
 
 /* ARGSUSED */
 int
 ufs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 	register struct vattr *vap = ap->a_vap;
 
 	ITIMES(ip, &time, &time);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = ip->i_dev;
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
-	vap->va_nlink = ip->i_nlink;
+	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	vap->va_rdev = (dev_t)ip->i_rdev;
 	vap->va_size = ip->i_din.di_size;
 	vap->va_atime.tv_sec = ip->i_atime;
 	vap->va_atime.tv_nsec = ip->i_atimensec;
 	vap->va_mtime.tv_sec = ip->i_mtime;
 	vap->va_mtime.tv_nsec = ip->i_mtimensec;
 	vap->va_ctime.tv_sec = ip->i_ctime;
 	vap->va_ctime.tv_nsec = ip->i_ctimensec;
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	/* this doesn't belong here */
 	if (vp->v_type == VBLK)
 		vap->va_blocksize = BLKDEV_IOSIZE;
 	else if (vp->v_type == VCHR)
 		vap->va_blocksize = MAXBSIZE;
 	else
 		vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_bytes = dbtob((u_quad_t)ip->i_blocks);
 	vap->va_type = vp->v_type;
 	vap->va_filerev = ip->i_modrev;
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 int
 ufs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct proc *p = ap->a_p;
 	struct timeval atimeval, mtimeval;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != ip->i_uid &&
 		    (error = suser(cred, &p->p_acflag)))
 			return (error);
 		if (cred->cr_uid == 0) {
 			if ((ip->i_flags
 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) &&
 			    securelevel > 0)
 				return (EPERM);
 			ip->i_flags = vap->va_flags;
 		} else {
 			if (ip->i_flags
 			    & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 			    (vap->va_flags & UF_SETTABLE) != vap->va_flags)
 				return (EPERM);
 			ip->i_flags &= SF_SETTABLE;
 			ip->i_flags |= (vap->va_flags & UF_SETTABLE);
 		}
 		ip->i_flag |= IN_CHANGE;
 		if (vap->va_flags & (IMMUTABLE | APPEND))
 			return (0);
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred, p))
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * Disallow write attempts on read-only file systems;
 		 * unless the file is a socket, fifo, or a block or
 		 * character device resident on the file system.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			break;
+		default:
+			break;
 		}
 		if (error = UFS_TRUNCATE(vp, vap->va_size, 0, cred, p))
 			return (error);
 	}
 	ip = VTOI(vp);
 	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if (cred->cr_uid != ip->i_uid &&
 		    (error = suser(cred, &p->p_acflag)) &&
 		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
 		    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
 			return (error);
 		if (vap->va_atime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_ACCESS;
 		if (vap->va_mtime.tv_sec != VNOVAL)
 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		atimeval.tv_sec = vap->va_atime.tv_sec;
 		atimeval.tv_usec = vap->va_atime.tv_nsec / 1000;
 		mtimeval.tv_sec = vap->va_mtime.tv_sec;
 		mtimeval.tv_usec = vap->va_mtime.tv_nsec / 1000;
-		error = UFS_UPDATE(vp, &atimeval, &mtimeval, 1);
+		error = UFS_UPDATE(vp, &atimeval, &mtimeval, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, p);
 	}
 	VN_POLLEVENT(vp, POLLATTRIB);
 	return (error);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ufs_chmod(vp, mode, cred, p)
 	register struct vnode *vp;
 	register int mode;
 	register struct ucred *cred;
 	struct proc *p;
 {
 	register struct inode *ip = VTOI(vp);
 	int error;
 
 	if (cred->cr_uid != ip->i_uid) {
 	    error = suser(cred, &p->p_acflag);
 	    if (error)
 		return (error);
 	}
 	if (cred->cr_uid) {
 		if (vp->v_type != VDIR && (mode & S_ISTXT))
 			return (EFTYPE);
 		if (!groupmember(ip->i_gid, cred) && (mode & ISGID))
 			return (EPERM);
 	}
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	ip->i_flag |= IN_CHANGE;
 	return (0);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ufs_chown(vp, uid, gid, cred, p)
 	register struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
 	register int i;
 	long change;
 #endif
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * If we don't own the file, are trying to change the owner
 	 * of the file, or are not a member of the target group,
 	 * the caller must be superuser or the call fails.
 	 */
 	if ((cred->cr_uid != ip->i_uid || uid != ip->i_uid ||
 	    (gid != ip->i_gid && !groupmember((gid_t)gid, cred))) &&
 	    (error = suser(cred, &p->p_acflag)))
 		return (error);
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 #ifdef QUOTA
 	if (error = getinoquota(ip))
 		return (error);
 	if (ouid == uid) {
 		dqrele(vp, ip->i_dquot[USRQUOTA]);
 		ip->i_dquot[USRQUOTA] = NODQUOT;
 	}
 	if (ogid == gid) {
 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
 		ip->i_dquot[GRPQUOTA] = NODQUOT;
 	}
 	change = ip->i_blocks;
 	(void) chkdq(ip, -change, cred, CHOWN);
 	(void) chkiq(ip, -1, cred, CHOWN);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		dqrele(vp, ip->i_dquot[i]);
 		ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	ip->i_gid = gid;
 	ip->i_uid = uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
 				goto good;
 			else
 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 	ip->i_gid = ogid;
 	ip->i_uid = ouid;
 	if (getinoquota(ip) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
 		(void) getinoquota(ip);
 	}
 	return (error);
 good:
 	if (getinoquota(ip))
 		panic("ufs_chown: lost quota");
 #endif /* QUOTA */
 	ip->i_flag |= IN_CHANGE;
 	if (cred->cr_uid != 0 && (ouid != uid || ogid != gid))
 		ip->i_mode &= ~(ISUID | ISGID);
 	return (0);
 }
 
 /*
  * Mmap a file
  *
  * NB Currently unsupported.
  */
 /* ARGSUSED */
 int
 ufs_mmap(ap)
 	struct vop_mmap_args /* {
 		struct vnode *a_vp;
 		int  a_fflags;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	return (EINVAL);
 }
 
 int
 ufs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
-	error = ufs_dirremove(dvp, ap->a_cnp);
-	if (error == 0) {
-		ip->i_nlink--;
-		ip->i_flag |= IN_CHANGE;
-	}
+	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
 	VN_POLLEVENT(vp, POLLNLINK);
 	VN_POLLEVENT(dvp, POLLWRITE);
 out:
 	if (dvp == vp)
 		vrele(vp);
 	else
 		vput(vp);
 	vput(dvp);
 	return (error);
 }
 
 /*
  * link vnode call
  */
 int
 ufs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct proc *p = cnp->cn_proc;
 	struct inode *ip;
 	struct timeval tv;
+	struct direct newdir;
 	int error;
 
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_link: no name");
 #endif
 	if (tdvp->v_mount != vp->v_mount) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EXDEV;
 		goto out2;
 	}
 	if (tdvp != vp && (error = vn_lock(vp, LK_EXCLUSIVE, p))) {
 		VOP_ABORTOP(tdvp, cnp);
 		goto out2;
 	}
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EMLINK;
 		goto out1;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		VOP_ABORTOP(tdvp, cnp);
 		error = EPERM;
 		goto out1;
 	}
+	ip->i_effnlink++;
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(vp))
+		softdep_increase_linkcnt(ip);
 	gettime(&tv);
-	error = UFS_UPDATE(vp, &tv, &tv, 1);
+	error = UFS_UPDATE(vp, &tv, &tv, !DOINGSOFTDEP(vp));
 	if (!error) {
-		error = ufs_direnter(ip, tdvp, cnp);
+		ufs_makedirentry(ip, cnp, &newdir);
+		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL);
 	}
 
 	if (error) {
+		ip->i_effnlink--;
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 	}
 	zfree(namei_zone, cnp->cn_pnbuf);
 out1:
 	if (tdvp != vp)
 		VOP_UNLOCK(vp, 0, p);
 out2:
 	VN_POLLEVENT(vp, POLLNLINK);
 	VN_POLLEVENT(tdvp, POLLWRITE);
 	vput(tdvp);
 	return (error);
 }
 
 /*
  * whiteout vnode call
  */
 int
 ufs_whiteout(ap)
 	struct vop_whiteout_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct direct newdir;
 	int error = 0;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		/* 4.4 format directories support whiteout operations */
 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
 			return (0);
 		return (EOPNOTSUPP);
 
 	case CREATE:
 		/* create a new directory whiteout */
 #ifdef DIAGNOSTIC
 		if ((cnp->cn_flags & SAVENAME) == 0)
 			panic("ufs_whiteout: missing name");
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		newdir.d_ino = WINO;
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
-		error = ufs_direnter2(dvp, &newdir, cnp->cn_cred, cnp->cn_proc);
+		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL);
 		break;
 
 	case DELETE:
 		/* remove an existing directory whiteout */
 #ifdef DIAGNOSTIC
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
-		error = ufs_dirremove(dvp, cnp);
+		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
+	default:
+		panic("ufs_whiteout: unknown op");
 	}
 	if (cnp->cn_flags & HASBUF) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		cnp->cn_flags &= ~HASBUF;
 	}
 	return (error);
 }
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 int
 ufs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tvp = ap->a_tvp;
 	register struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct proc *p = fcnp->cn_proc;
 	struct inode *ip, *xp, *dp;
-	struct dirtemplate dirbuf;
+	struct direct newdir;
 	struct timeval tv;
 	int doingdirectory = 0, oldparent = 0, newparent = 0;
 	int error = 0;
-	u_char namlen;
 
 #ifdef DIAGNOSTIC
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ufs_rename: no name");
 #endif
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 abortit:
 		VOP_ABORTOP(tdvp, tcnp); /* XXX, why not in NFS? */
 		if (tdvp == tvp)
 			vrele(tdvp);
 		else
 			vput(tdvp);
 		if (tvp)
 			vput(tvp);
 		VOP_ABORTOP(fdvp, fcnp); /* XXX, why not in NFS? */
 		vrele(fdvp);
 		vrele(fvp);
 		return (error);
 	}
 
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto abortit;
 	}
 
 	/*
 	 * Check if just deleting a link name or if we've lost a race.
 	 * If another process completes the same rename after we've looked
 	 * up the source and have blocked looking up the target, then the
 	 * source and target inodes may be identical now although the
 	 * names were never linked.
 	 */
 	if (fvp == tvp) {
 		if (fvp->v_type == VDIR) {
 			/*
 			 * Linked directories are impossible, so we must
 			 * have lost the race.  Pretend that the rename
 			 * completed before the lookup.
 			 */
 #ifdef UFS_RENAME_DEBUG
 			printf("ufs_rename: fvp == tvp for directories\n");
 #endif
 			error = ENOENT;
 			goto abortit;
 		}
 
 		/* Release destination completely. */
 		VOP_ABORTOP(tdvp, tcnp);
 		vput(tdvp);
 		vput(tvp);
 
 		/*
 		 * Delete source.  There is another race now that everything
 		 * is unlocked, but this doesn't cause any new complications.
 		 * Relookup() may find a file that is unrelated to the
 		 * original one, or it may fail.  Too bad.
 		 */
 		vrele(fdvp);
 		vrele(fvp);
 		fcnp->cn_flags &= ~MODMASK;
 		fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 		if ((fcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost from startdir");
 		fcnp->cn_nameiop = DELETE;
 		VREF(fdvp);
 		error = relookup(fdvp, &fvp, fcnp);
 		if (error == 0)
 			vrele(fdvp);
 		if (fvp == NULL) {
 #ifdef UFS_RENAME_DEBUG
 			printf("ufs_rename: from name disappeared\n");
 #endif
 			return (ENOENT);
 		}
 		return (VOP_REMOVE(fdvp, fvp, fcnp));
 	}
 	if (error = vn_lock(fvp, LK_EXCLUSIVE, p))
 		goto abortit;
 	dp = VTOI(fdvp);
 	ip = VTOI(fvp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (dp->i_flags & APPEND)) {
 		VOP_UNLOCK(fvp, 0, p);
 		error = EPERM;
 		goto abortit;
 	}
 	if ((ip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT ||
 		    (ip->i_flag & IN_RENAME)) {
 			VOP_UNLOCK(fvp, 0, p);
 			error = EINVAL;
 			goto abortit;
 		}
 		ip->i_flag |= IN_RENAME;
 		oldparent = dp->i_number;
 		doingdirectory++;
 	}
 	VN_POLLEVENT(fdvp, POLLWRITE);
 	vrele(fdvp);
 
 	/*
 	 * When the target exists, both the directory
 	 * and target vnodes are returned locked.
 	 */
 	dp = VTOI(tdvp);
 	xp = NULL;
 	if (tvp)
 		xp = VTOI(tvp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
+	ip->i_effnlink++;
 	ip->i_nlink++;
 	ip->i_flag |= IN_CHANGE;
+	if (DOINGSOFTDEP(fvp))
+		softdep_increase_linkcnt(ip);
 	gettime(&tv);
-	if (error = UFS_UPDATE(fvp, &tv, &tv, 1)) {
+	if (error = UFS_UPDATE(fvp, &tv, &tv, !DOINGSOFTDEP(fvp))) {
 		VOP_UNLOCK(fvp, 0, p);
 		goto bad;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory heirarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..". We must repeat the call
 	 * to namei, as the parent directory is unlocked by the
 	 * call to checkpath().
 	 */
 	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
 	VOP_UNLOCK(fvp, 0, p);
 	if (oldparent != dp->i_number)
 		newparent = dp->i_number;
 	if (doingdirectory && newparent) {
 		if (error)	/* write access check above */
 			goto bad;
 		if (xp != NULL)
 			vput(tvp);
 		error = ufs_checkpath(ip, dp, tcnp->cn_cred);
 		if (error)
 			goto out;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
 		VREF(tdvp);
 		error = relookup(tdvp, &tvp, tcnp);
 		if (error)
 			goto out;
 		vrele(tdvp);
 		dp = VTOI(tdvp);
 		xp = NULL;
 		if (tvp)
 			xp = VTOI(tvp);
 	}
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (xp == NULL) {
 		if (dp->i_dev != ip->i_dev)
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Account for ".." in new directory.
 		 * When source and destination have the same
 		 * parent we don't fool with the link count.
 		 */
 		if (doingdirectory && newparent) {
 			if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
+			dp->i_effnlink++;
 			dp->i_nlink++;
 			dp->i_flag |= IN_CHANGE;
-			error = UFS_UPDATE(tdvp, &tv, &tv, 1);
+			if (DOINGSOFTDEP(tdvp))
+				softdep_increase_linkcnt(dp);
+			error = UFS_UPDATE(tdvp, &tv, &tv, !DOINGSOFTDEP(tdvp));
 			if (error)
 				goto bad;
 		}
-		error = ufs_direnter(ip, tdvp, tcnp);
+		ufs_makedirentry(ip, tcnp, &newdir);
+		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL);
 		if (error) {
 			if (doingdirectory && newparent) {
+				dp->i_effnlink--;
 				dp->i_nlink--;
 				dp->i_flag |= IN_CHANGE;
 				(void)UFS_UPDATE(tdvp, &tv, &tv, 1);
 			}
 			goto bad;
 		}
 		VN_POLLEVENT(tdvp, POLLWRITE);
 		vput(tdvp);
 	} else {
 		if (xp->i_dev != dp->i_dev || xp->i_dev != ip->i_dev)
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (xp->i_number == ip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the user must
 		 * own the parent directory, or the destination of the rename,
 		 * otherwise the destination may not be changed (except by
 		 * root). This implements append-only directories.
 		 */
 		if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 &&
 		    tcnp->cn_cred->cr_uid != dp->i_uid &&
 		    xp->i_uid != tcnp->cn_cred->cr_uid) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((xp->i_mode&IFMT) == IFDIR) {
-			if (! ufs_dirempty
-					 (xp, dp->i_number, tcnp->cn_cred) || 
-			    xp->i_nlink > 2) {
+			if ((xp->i_effnlink > 2) ||
+			    !ufs_dirempty(xp, dp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
-		error = ufs_dirrewrite(dp, ip, tcnp);
+		error = ufs_dirrewrite(dp, xp, ip->i_number,
+		    IFTODT(ip->i_mode), doingdirectory);
 		if (error)
 			goto bad;
-		/*
-		 * If the target directory is in the same
-		 * directory as the source directory,
-		 * decrement the link count on the parent
-		 * of the target directory.
-		 */
-		 if (doingdirectory && !newparent) {
-			dp->i_nlink--;
+		if (doingdirectory) {
+			dp->i_effnlink--;
 			dp->i_flag |= IN_CHANGE;
+			xp->i_effnlink--;
+			xp->i_flag |= IN_CHANGE;
 		}
 		VN_POLLEVENT(tdvp, POLLWRITE);
-		vput(tdvp);
-		/*
-		 * Adjust the link count of the target to
-		 * reflect the dirrewrite above.  If this is
-		 * a directory it is empty and there are
-		 * no links to it, so we can squash the inode and
-		 * any space associated with it.  We disallowed
-		 * renaming over top of a directory with links to
-		 * it above, as the remaining link would point to
-		 * a directory without "." or ".." entries.
-		 */
-		xp->i_nlink--;
-		if (doingdirectory) {
-			if (--xp->i_nlink != 0)
-				panic("ufs_rename: linked directory");
-			error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
-			    tcnp->cn_cred, tcnp->cn_proc);
+		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
+			/*
+			 * Truncate inode. The only stuff left in the directory
+			 * is "." and "..". The "." reference is inconsequential
+			 * since we are quashing it. We have removed the "."
+			 * reference and the reference in the parent directory,
+			 * but there may be other hard links. The soft
+			 * dependency code will arrange to do these operations
+			 * after the parent directory entry has been deleted on
+			 * disk, so when running with that code we avoid doing
+			 * them now.
+			 */
+			dp->i_nlink--;
+			xp->i_nlink--;
+			if ((error = UFS_TRUNCATE(tvp, (off_t)0, IO_SYNC,
+			    tcnp->cn_cred, tcnp->cn_proc)) != 0)
+				goto bad;
 		}
-		xp->i_flag |= IN_CHANGE;
-		VN_POLLEVENT(tvp, POLLNLINK);
+		vput(tdvp);
+		VN_POLLEVENT(tvp, POLLNLINK); /* XXX this right? */
 		vput(tvp);
 		xp = NULL;
 	}
 
 	/*
 	 * 3) Unlink the source.
 	 */
 	fcnp->cn_flags &= ~MODMASK;
 	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
 	if ((fcnp->cn_flags & SAVESTART) == 0)
 		panic("ufs_rename: lost from startdir");
 	VREF(fdvp);
 	error = relookup(fdvp, &fvp, fcnp);
 	if (error == 0)
 		vrele(fdvp);
 	if (fvp != NULL) {
 		xp = VTOI(fvp);
 		dp = VTOI(fdvp);
 	} else {
 		/*
 		 * From name has disappeared.
 		 */
 		if (doingdirectory)
 			panic("ufs_rename: lost dir entry");
 		vrele(ap->a_fvp);
 		return (0);
 	}
 	/*
 	 * Ensure that the directory entry still exists and has not
 	 * changed while the new name has been entered. If the source is
 	 * a file then the entry may have been unlinked or renamed. In
 	 * either case there is no further work to be done. If the source
-	 * is a directory then it cannot have been rmdir'ed; its link
-	 * count of three would cause a rmdir to fail with ENOTEMPTY.
-	 * The IN_RENAME flag ensures that it cannot be moved by another
-	 * rename.
+	 * is a directory then it cannot have been rmdir'ed; the IN_RENAME
+	 * flag ensures that it cannot be moved by another rename or removed
+	 * by a rmdir.
 	 */
 	if (xp != ip) {
 		if (doingdirectory)
 			panic("ufs_rename: lost dir entry");
 	} else {
 		/*
 		 * If the source is a directory with a
 		 * new parent, the link count of the old
 		 * parent directory must be decremented
 		 * and ".." set to point to the new parent.
 		 */
 		if (doingdirectory && newparent) {
-			dp->i_nlink--;
-			dp->i_flag |= IN_CHANGE;
-			error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf,
-				sizeof (struct dirtemplate), (off_t)0,
-				UIO_SYSSPACE, IO_NODELOCKED,
-				tcnp->cn_cred, (int *)0, (struct proc *)0);
-			if (error == 0) {
-#				if (BYTE_ORDER == LITTLE_ENDIAN)
-					if (fvp->v_mount->mnt_maxsymlinklen <= 0)
-						namlen = dirbuf.dotdot_type;
-					else
-						namlen = dirbuf.dotdot_namlen;
-#				else
-					namlen = dirbuf.dotdot_namlen;
-#				endif
-				if (namlen != 2 ||
-				    dirbuf.dotdot_name[0] != '.' ||
-				    dirbuf.dotdot_name[1] != '.') {
-					ufs_dirbad(xp, (doff_t)12,
-					    "rename: mangled dir");
-				} else {
-					dirbuf.dotdot_ino = newparent;
-					(void) vn_rdwr(UIO_WRITE, fvp,
-					    (caddr_t)&dirbuf,
-					    sizeof (struct dirtemplate),
-					    (off_t)0, UIO_SYSSPACE,
-					    IO_NODELOCKED|IO_SYNC,
-					    tcnp->cn_cred, (int *)0,
-					    (struct proc *)0);
-					cache_purge(fdvp);
-				}
-			}
+			xp->i_offset = mastertemplate.dot_reclen;
+			ufs_dirrewrite(xp, dp, newparent, DT_DIR, 0);
+			cache_purge(fdvp);
 		}
-		error = ufs_dirremove(fdvp, fcnp);
-		if (!error) {
-			xp->i_nlink--;
-			xp->i_flag |= IN_CHANGE;
-		}
+		error = ufs_dirremove(fdvp, xp, fcnp->cn_flags, 0);
 		xp->i_flag &= ~IN_RENAME;
 	}
 	if (dp)
 		vput(fdvp);
 	if (xp)
 		vput(fvp);
 	vrele(ap->a_fvp);
 	return (error);
 
 bad:
 	if (xp)
 		vput(ITOV(xp));
 	vput(ITOV(dp));
 out:
 	if (doingdirectory)
 		ip->i_flag &= ~IN_RENAME;
 	if (vn_lock(fvp, LK_EXCLUSIVE, p) == 0) {
+		ip->i_effnlink--;
 		ip->i_nlink--;
 		ip->i_flag |= IN_CHANGE;
 		ip->i_flag &= ~IN_RENAME;
 		vput(fvp);
 	} else
 		vrele(fvp);
 	return (error);
 }
 
 /*
- * A virgin directory (no blushing please).
- */
-static struct dirtemplate mastertemplate = {
-	0, 12, DT_DIR, 1, { '.', 0 },
-	0, DIRBLKSIZ - 12, DT_DIR, 2, { '.', '.', 0 }
-};
-static struct odirtemplate omastertemplate = {
-	0, 12, 1, { '.', 0 },
-	0, DIRBLKSIZ - 12, 2, { '.', '.', 0 }
-};
-
-/*
  * Mkdir system call
  */
 int
 ufs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	register struct vnode *dvp = ap->a_dvp;
 	register struct vattr *vap = ap->a_vap;
 	register struct componentname *cnp = ap->a_cnp;
 	register struct inode *ip, *dp;
 	struct vnode *tvp;
+	struct buf *bp;
 	struct dirtemplate dirtemplate, *dtp;
+	struct direct newdir;
 	struct timeval tv;
 	int error, dmode;
 
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_mkdir: no name");
 #endif
 	dp = VTOI(dvp);
 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ufs_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		ucp = cnp->cn_cred;
 #endif			I
 		/*
 		 * If we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 #ifdef QUOTA
 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
 				/*
 				 * Make sure the correct user gets charged
 				 * for the space.
 				 * Make a dummy credential for the victim.
 				 * XXX This seems to never be accessed out of
 				 * our context so a stack variable is ok.
 				 */
 				ucred.cr_ref = 1;
 				ucred.cr_uid = ip->i_uid;
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups[0] = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
 		} else
 			ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			zfree(namei_zone, cnp->cn_pnbuf);
 			UFS_VFREE(tvp, ip->i_number, dmode);
 			vput(tvp);
 			vput(dvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		UFS_VFREE(tvp, ip->i_number, dmode);
 		vput(tvp);
 		vput(dvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = dmode;
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
+	if (DOINGSOFTDEP(tvp))
+		softdep_increase_linkcnt(ip);
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
-	gettime(&tv);
-	error = UFS_UPDATE(tvp, &tv, &tv, 1);
 
 	/*
-	 * Bump link count in parent directory
-	 * to reflect work done below.  Should
-	 * be done before reference is created
-	 * so reparation is possible if we crash.
+	 * Bump link count in parent directory to reflect work done below.
+	 * Should be done before reference is created so cleanup is
+	 * possible if we crash.
 	 */
+	dp->i_effnlink++;
 	dp->i_nlink++;
 	dp->i_flag |= IN_CHANGE;
-	error = UFS_UPDATE(dvp, &tv, &tv, 1);
+	if (DOINGSOFTDEP(dvp))
+		softdep_increase_linkcnt(dp);
+	gettime(&tv);
+        error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(dvp));
 	if (error)
 		goto bad;
 
-	/* Initialize directory with "." and ".." from static template. */
+	/*
+	 * Initialize directory with "." and ".." from static template.
+	 */
 	if (dvp->v_mount->mnt_maxsymlinklen > 0
 	)
 		dtp = &mastertemplate;
 	else
 		dtp = (struct dirtemplate *)&omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
-	error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate,
-	    sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE,
-	    IO_NODELOCKED|IO_SYNC, cnp->cn_cred, (int *)0, (struct proc *)0);
-	if (error) {
-		dp->i_nlink--;
-		dp->i_flag |= IN_CHANGE;
+	if ((error = VOP_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
+	    B_CLRBUF, &bp)) != 0)
 		goto bad;
+	ip->i_size = DIRBLKSIZ;
+	ip->i_flag |= IN_CHANGE | IN_UPDATE;
+	vnode_pager_setsize(tvp, (u_long)ip->i_size);
+	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
+	if ((error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp))) != 0) {
+		(void)VOP_BWRITE(bp);
+		goto bad;
 	}
-	if (DIRBLKSIZ > VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_bsize)
-		panic("ufs_mkdir: blksize"); /* XXX should grow with balloc() */
-	else {
-		ip->i_size = DIRBLKSIZ;
-		ip->i_flag |= IN_CHANGE;
-	}
-
-	/* Directory set up, now install it's entry in the parent directory. */
-	error = ufs_direnter(ip, dvp, cnp);
-	if (error) {
-		dp->i_nlink--;
-		dp->i_flag |= IN_CHANGE;
-	}
-	VN_POLLEVENT(dvp, POLLWRITE);
-bad:
+	VN_POLLEVENT(dvp, POLLWRITE); /* XXX right place? */
 	/*
-	 * No need to do an explicit VOP_TRUNCATE here, vrele will do this
-	 * for us because we set the link count to 0.
+	 * Directory set up, now install it's entry in the parent directory.
+	 *
+	 * If we are not doing soft dependencies, then we must write out the
+	 * buffer containing the new directory body before entering the new 
+	 * name in the parent. If we are doing soft dependencies, then the
+	 * buffer containing the new directory body will be passed to and
+	 * released in the soft dependency code after the code has attached
+	 * an appropriate ordering dependency to the buffer which ensures that
+	 * the buffer is written before the new name is written in the parent.
 	 */
-	if (error) {
+	if (!DOINGSOFTDEP(dvp) && ((error = VOP_BWRITE(bp)) != 0))
+		goto bad;
+	ufs_makedirentry(ip, cnp, &newdir);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp);
+	
+bad:
+	if (error == 0) {
+		*ap->a_vpp = tvp;
+	} else {
+		dp->i_effnlink--;
+		dp->i_nlink--;
+		dp->i_flag |= IN_CHANGE;
+		/*
+		 * No need to do an explicit VOP_TRUNCATE here, vrele will
+		 * do this for us because we set the link count to 0.
+		 */
+		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
 		ip->i_flag |= IN_CHANGE;
 		vput(tvp);
-	} else
-		*ap->a_vpp = tvp;
+	}
 out:
 	zfree(namei_zone, cnp->cn_pnbuf);
 	vput(dvp);
 	return (error);
 }
 
 /*
  * Rmdir system call.
  */
 int
 ufs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
-	 * Verify the directory is empty (and valid).
-	 * (Rmdir ".." won't be valid since
-	 *  ".." will contain a reference to
-	 *  the current directory and thus be
-	 *  non-empty.)
+	 * Do not remove a directory that is in the process of being renamed.
+	 * Verify the directory is empty (and valid). Rmdir ".." will not be
+	 * valid since ".." will contain a reference to the current directory
+	 * and thus be non-empty.
 	 */
 	error = 0;
-	if (ip->i_nlink != 2 ||
+	if (ip->i_flag & IN_RENAME) {
+		error = EINVAL;
+		goto out;
+	}
+	if (ip->i_effnlink != 2 ||
 	    !ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
-	error = ufs_dirremove(dvp, cnp);
+	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error)
 		goto out;
 	VN_POLLEVENT(dvp, POLLWRITE|POLLNLINK);
-	dp->i_nlink--;
-	dp->i_flag |= IN_CHANGE;
 	cache_purge(dvp);
-	vput(dvp);
-	dvp = NULL;
 	/*
-	 * Truncate inode.  The only stuff left
-	 * in the directory is "." and "..".  The
-	 * "." reference is inconsequential since
-	 * we're quashing it.  The ".." reference
-	 * has already been adjusted above.  We've
-	 * removed the "." reference and the reference
-	 * in the parent directory, but there may be
-	 * other hard links so decrement by 2 and
-	 * worry about them later.
+	 * Truncate inode. The only stuff left in the directory is "." and
+	 * "..". The "." reference is inconsequential since we are quashing
+	 * it. We have removed the "." reference and the reference in the
+	 * parent directory, but there may be other hard links. So,
+	 * ufs_dirremove will set the UF_IMMUTABLE flag to ensure that no
+	 * new entries are made. The soft dependency code will arrange to
+	 * do these operations after the parent directory entry has been
+	 * deleted on disk, so when running with that code we avoid doing
+	 * them now.
 	 */
-	ip->i_nlink -= 2;
-	error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
-	    cnp->cn_proc);
-	cache_purge(ITOV(ip));
-	VN_POLLEVENT(vp, POLLNLINK);
+	dp->i_effnlink--;
+	dp->i_flag |= IN_CHANGE;
+	ip->i_effnlink--;
+	ip->i_flag |= IN_CHANGE;
+	if (!DOINGSOFTDEP(vp)) {
+		dp->i_nlink--;
+		ip->i_nlink--;
+		error = UFS_TRUNCATE(vp, (off_t)0, IO_SYNC, cnp->cn_cred,
+		    cnp->cn_proc);
+	}
+	cache_purge(vp);
 out:
-	if (dvp)
-		vput(dvp);
+	vput(dvp);
+	VN_POLLEVENT(vp, POLLNLINK);
 	vput(vp);
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 int
 ufs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	register struct vnode *vp, **vpp = ap->a_vpp;
 	register struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp);
 	if (error)
 		return (error);
 	VN_POLLEVENT(ap->a_dvp, POLLWRITE);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < vp->v_mount->mnt_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, (char *)ip->i_shortlink, len);
 		ip->i_size = len;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 		    UIO_SYSSPACE, IO_NODELOCKED, ap->a_cnp->cn_cred, (int *)0,
 		    (struct proc *)0);
 	vput(vp);
 	return (error);
 }
 
 /*
  * Vnode op for reading directories.
  *
  * The routine below assumes that the on-disk format of a directory
  * is the same as that defined by <sys/dirent.h>. If the on-disk
  * format changes, then it will be necessary to do a conversion
  * from the on-disk format that read returns to the format defined
  * by <sys/dirent.h>.
  */
 int
 ufs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	register struct uio *uio = ap->a_uio;
 	int error;
 	size_t count, lost;
 	off_t off;
 
 	if (ap->a_ncookies != NULL)
 		/*
 		 * Ensure that the block is aligned.  The caller can use
 		 * the cookies to determine where in the block to start.
 		 */
 		uio->uio_offset &= ~(DIRBLKSIZ - 1);
 	off = uio->uio_offset;
 	count = uio->uio_resid;
 	/* Make sure we don't return partial entries. */
 	count -= (uio->uio_offset + count) & (DIRBLKSIZ -1);
 	if (count <= 0)
 		return (EINVAL);
 	lost = uio->uio_resid - count;
 	uio->uio_resid = count;
 	uio->uio_iov->iov_len = count;
 #	if (BYTE_ORDER == LITTLE_ENDIAN)
 		if (ap->a_vp->v_mount->mnt_maxsymlinklen > 0) {
 			error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 		} else {
 			struct dirent *dp, *edp;
 			struct uio auio;
 			struct iovec aiov;
 			caddr_t dirbuf;
 			int readcnt;
 			u_char tmp;
 
 			auio = *uio;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			aiov.iov_len = count;
 			MALLOC(dirbuf, caddr_t, count, M_TEMP, M_WAITOK);
 			aiov.iov_base = dirbuf;
 			error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred);
 			if (error == 0) {
 				readcnt = count - auio.uio_resid;
 				edp = (struct dirent *)&dirbuf[readcnt];
 				for (dp = (struct dirent *)dirbuf; dp < edp; ) {
 					tmp = dp->d_namlen;
 					dp->d_namlen = dp->d_type;
 					dp->d_type = tmp;
 					if (dp->d_reclen > 0) {
 						dp = (struct dirent *)
 						    ((char *)dp + dp->d_reclen);
 					} else {
 						error = EIO;
 						break;
 					}
 				}
 				if (dp >= edp)
 					error = uiomove(dirbuf, readcnt, uio);
 			}
 			FREE(dirbuf, M_TEMP);
 		}
 #	else
 		error = VOP_READ(ap->a_vp, uio, 0, ap->a_cred);
 #	endif
 	if (!error && ap->a_ncookies != NULL) {
 		struct dirent* dpStart;
 		struct dirent* dpEnd;
 		struct dirent* dp;
 		int ncookies;
 		u_long *cookies;
 		u_long *cookiep;
 
 		if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1)
 			panic("ufs_readdir: unexpected uio from NFS server");
 		dpStart = (struct dirent *)
 		     (uio->uio_iov->iov_base - (uio->uio_offset - off));
 		dpEnd = (struct dirent *) uio->uio_iov->iov_base;
 		for (dp = dpStart, ncookies = 0;
 		     dp < dpEnd;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen))
 			ncookies++;
 		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
 		    M_WAITOK);
 		for (dp = dpStart, cookiep = cookies;
 		     dp < dpEnd;
 		     dp = (struct dirent *)((caddr_t) dp + dp->d_reclen)) {
 			off += dp->d_reclen;
 			*cookiep++ = (u_long) off;
 		}
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	}
 	uio->uio_resid += lost;
 	if (ap->a_eofflag)
 	    *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 int
 ufs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 	int isize;
 
 	isize = ip->i_size;
 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 	    (ip->i_din.di_blocks == 0)) {	/* XXX - for old fastlink support */
 		uiomove((char *)ip->i_shortlink, isize, ap->a_uio);
 		return (0);
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
  * done. If a buffer has been saved in anticipation of a CREATE, delete it.
  */
 /* ARGSUSED */
 int
 ufs_abortop(ap)
 	struct vop_abortop_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
 		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
 	return (0);
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  */
 int
 ufs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct buf *a_bp;
 	} */ *ap;
 {
 	register struct buf *bp = ap->a_bp;
 	register struct vnode *vp = bp->b_vp;
 	register struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
 	if (vp->v_type == VBLK || vp->v_type == VCHR)
 		panic("ufs_strategy: spec");
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 		if (error) {
 			bp->b_error = error;
 			bp->b_flags |= B_ERROR;
 			biodone(bp);
 			return (error);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		biodone(bp);
 		return (0);
 	}
 	vp = ip->i_devvp;
 	bp->b_dev = vp->v_rdev;
 	VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 int
 ufs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct inode *ip = VTOI(vp);
 
 	printf("tag VT_UFS, ino %ld, on dev %d, %d", ip->i_number,
 		major(ip->i_dev), minor(ip->i_dev));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	lockmgr_printinfo(&ip->i_lock);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Read wrapper for special devices.
  */
 int
 ufsspec_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	/*
 	 * Set access flag.
 	 */
 	if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME))
 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for special devices.
  */
 int
 ufsspec_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	/*
 	 * Set update and change flags.
 	 */
 	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for special devices.
  *
  * Update the times on the inode then do device close.
  */
 int
 ufsspec_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	simple_lock(&vp->v_interlock);
 	if (ap->a_vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
 	simple_unlock(&vp->v_interlock);
 	return (VOCALL (spec_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Read wrapper for fifo's
  */
 int
 ufsfifo_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 
 	/*
 	 * Set access flag.
 	 */
 	if (!(ap->a_vp->v_mount->mnt_flag & MNT_NOATIME))
 		VTOI(ap->a_vp)->i_flag |= IN_ACCESS;
 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_read), ap));
 }
 
 /*
  * Write wrapper for fifo's.
  */
 int
 ufsfifo_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	/*
 	 * Set update and change flags.
 	 */
 	VTOI(ap->a_vp)->i_flag |= IN_CHANGE | IN_UPDATE;
 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_write), ap));
 }
 
 /*
  * Close wrapper for fifo's.
  *
  * Update the times on the inode then do device close.
  */
 int
 ufsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	simple_lock(&vp->v_interlock);
 	if (ap->a_vp->v_usecount > 1)
 		ITIMES(ip, &time, &time);
 	simple_unlock(&vp->v_interlock);
 	return (VOCALL (fifo_vnodeop_p, VOFFSET(vop_close), ap));
 }
 
 /*
  * Return POSIX pathconf information applicable to ufs filesystems.
  */
 int
 ufs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = LINK_MAX;
 		return (0);
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		return (0);
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		return (0);
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		return (0);
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		return (0);
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		return (0);
 	default:
 		return (EINVAL);
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Advisory record locking support
  */
 int
 ufs_advlock(ap)
 	struct vop_advlock_args /* {
 		struct vnode *a_vp;
 		caddr_t  a_id;
 		int  a_op;
 		struct flock *a_fl;
 		int  a_flags;
 	} */ *ap;
 {
 	register struct inode *ip = VTOI(ap->a_vp);
 
 	return (lf_advlock(ap, &(ip->i_lockf), ip->i_size));
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ufs_vinit(mntp, specops, fifoops, vpp)
 	struct mount *mntp;
 	vop_t **specops;
 	vop_t **fifoops;
 	struct vnode **vpp;
 {
 	struct inode *ip;
 	struct vnode *vp, *nvp;
 
 	vp = *vpp;
 	ip = VTOI(vp);
 	switch(vp->v_type = IFTOVT(ip->i_mode)) {
 	case VCHR:
 	case VBLK:
 		vp->v_op = specops;
 		nvp = checkalias(vp, ip->i_rdev, mntp);
 		if (nvp) {
 			/*
 			 * Discard unneeded vnode, but save its inode.
 			 * Note that the lock is carried over in the inode
 			 * to the replacement vnode.
 			 */
 			nvp->v_data = vp->v_data;
 			vp->v_data = NULL;
 			vp->v_op = spec_vnodeop_p;
 			vrele(vp);
 			vgone(vp);
 			/*
 			 * Reinitialize aliased inode.
 			 */
 			vp = nvp;
 			ip->i_vnode = vp;
 		}
 		break;
 	case VFIFO:
 		vp->v_op = fifoops;
 		break;
 	default:
 		break;
 
 	}
 	if (ip->i_number == ROOTINO)
-                vp->v_flag |= VROOT;
+		vp->v_flag |= VROOT;
 	/*
 	 * Initialize modrev times
 	 */
 	SETHIGH(ip->i_modrev, mono_time.tv_sec);
 	SETLOW(ip->i_modrev, mono_time.tv_usec * 4294);
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  */
 int
 ufs_makeinode(mode, dvp, vpp, cnp)
 	int mode;
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
 {
 	register struct inode *ip, *pdir;
+	struct direct newdir;
 	struct timeval tv;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
 #ifdef DIAGNOSTIC
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_makeinode: no name");
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		vput(dvp);
 		return (error);
 	}
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		ucp = cnp->cn_cred;
 #endif			I
 		/*
 		 * If we are not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TOO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			mode &= ~07111;
 #ifdef QUOTA
 			/*
 			 * Make sure the correct user gets charged
 			 * for the space.
 			 * Quickly knock up a dummy credential for the victim.
 			 * XXX This seems to never be accessed out of our
 			 * context so a stack variable is ok.
 			 */
 			ucred.cr_ref = 1;
 			ucred.cr_uid = ip->i_uid;
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups[0] = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else
 			ip->i_uid = cnp->cn_cred->cr_uid;
 
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			zfree(namei_zone, cnp->cn_pnbuf);
 			UFS_VFREE(tvp, ip->i_number, mode);
 			vput(tvp);
 			vput(dvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		zfree(namei_zone, cnp->cn_pnbuf);
 		UFS_VFREE(tvp, ip->i_number, mode);
 		vput(tvp);
 		vput(dvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = mode;
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
+	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
+	if (DOINGSOFTDEP(tvp))
+		softdep_increase_linkcnt(ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    suser(cnp->cn_cred, NULL))
 		ip->i_mode &= ~ISGID;
 
 	if (cnp->cn_flags & ISWHITEOUT)
 		ip->i_flags |= UF_OPAQUE;
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	gettime(&tv);
-	error = UFS_UPDATE(tvp, &tv, &tv, 1);
+	error = UFS_UPDATE(tvp, &tv, &tv, !DOINGSOFTDEP(tvp));
 	if (error)
 		goto bad;
-	error = ufs_direnter(ip, dvp, cnp);
+	ufs_makedirentry(ip, cnp, &newdir);
+	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL);
 	if (error)
 		goto bad;
 
 	if ((cnp->cn_flags & SAVESTART) == 0)
 		zfree(namei_zone, cnp->cn_pnbuf);
 	vput(dvp);
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	zfree(namei_zone, cnp->cn_pnbuf);
 	vput(dvp);
+	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
 	ip->i_flag |= IN_CHANGE;
 	vput(tvp);
 	return (error);
 }
 
 static int
 ufs_missingop(ap)
 	struct vop_generic_args *ap;
 {
 
 	panic("no vop function for %s in ufs child", ap->a_desc->vdesc_name);
 	return (EOPNOTSUPP);
 }
 
 /* Global vfs data structures for ufs. */
 static vop_t **ufs_vnodeop_p;
 static struct vnodeopv_entry_desc ufs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_read_desc,		(vop_t *) ufs_missingop },
 	{ &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
 	{ &vop_write_desc,		(vop_t *) ufs_missingop },
 	{ &vop_abortop_desc,		(vop_t *) ufs_abortop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_advlock_desc,		(vop_t *) ufs_advlock },
 	{ &vop_bmap_desc,		(vop_t *) ufs_bmap },
 	{ &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
 	{ &vop_close_desc,		(vop_t *) ufs_close },
 	{ &vop_create_desc,		(vop_t *) ufs_create },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_link_desc,		(vop_t *) ufs_link },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
 	{ &vop_mkdir_desc,		(vop_t *) ufs_mkdir },
 	{ &vop_mknod_desc,		(vop_t *) ufs_mknod },
 	{ &vop_mmap_desc,		(vop_t *) ufs_mmap },
 	{ &vop_open_desc,		(vop_t *) ufs_open },
 	{ &vop_pathconf_desc,		(vop_t *) ufs_pathconf },
 	{ &vop_poll_desc,		(vop_t *) vop_stdpoll },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_readdir_desc,		(vop_t *) ufs_readdir },
 	{ &vop_readlink_desc,		(vop_t *) ufs_readlink },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_remove_desc,		(vop_t *) ufs_remove },
 	{ &vop_rename_desc,		(vop_t *) ufs_rename },
 	{ &vop_rmdir_desc,		(vop_t *) ufs_rmdir },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_strategy_desc,		(vop_t *) ufs_strategy },
 	{ &vop_symlink_desc,		(vop_t *) ufs_symlink },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_whiteout_desc,		(vop_t *) ufs_whiteout },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_vnodeop_opv_desc =
 	{ &ufs_vnodeop_p, ufs_vnodeop_entries };
 
 static vop_t **ufs_specop_p;
 static struct vnodeopv_entry_desc ufs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_close_desc,		(vop_t *) ufsspec_close },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_read_desc,		(vop_t *) ufsspec_read },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) ufsspec_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_specop_opv_desc =
 	{ &ufs_specop_p, ufs_specop_entries };
 
 static vop_t **ufs_fifoop_p;
 static struct vnodeopv_entry_desc ufs_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ufs_missingop },
 	{ &vop_access_desc,		(vop_t *) ufs_access },
 	{ &vop_close_desc,		(vop_t *) ufsfifo_close },
 	{ &vop_getattr_desc,		(vop_t *) ufs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) ufs_inactive },
 	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
 	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
 	{ &vop_print_desc,		(vop_t *) ufs_print },
 	{ &vop_read_desc,		(vop_t *) ufsfifo_read },
 	{ &vop_reclaim_desc,		(vop_t *) ufs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) ufs_setattr },
 	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
 	{ &vop_write_desc,		(vop_t *) ufsfifo_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ufs_fifoop_opv_desc =
 	{ &ufs_fifoop_p, ufs_fifoop_entries };
 
 VNODEOP_SET(ufs_vnodeop_opv_desc);
 VNODEOP_SET(ufs_specop_opv_desc);
 VNODEOP_SET(ufs_fifoop_opv_desc);
 
 int
 ufs_vnoperate(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_vnodeop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 int
 ufs_vnoperatefifo(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_fifoop_p, ap->a_desc->vdesc_offset, ap));
 }
 
 int
 ufs_vnoperatespec(ap)
 	struct vop_generic_args /* {
 		struct vnodeop_desc *a_desc;
 	} */ *ap;
 {
 	return (VOCALL(ufs_specop_p, ap->a_desc->vdesc_offset, ap));
 }