Index: head/bin/pax/gen_subs.c
===================================================================
--- head/bin/pax/gen_subs.c	(revision 311521)
+++ head/bin/pax/gen_subs.c	(revision 311522)
@@ -1,396 +1,397 @@
 /*-
  * Copyright (c) 1992 Keith Muller.
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Keith Muller of the University of California, San Diego.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)gen_subs.c	8.1 (Berkeley) 5/31/93";
 #endif
 #endif /* not lint */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/time.h>
 #include <sys/stat.h>
 #include <langinfo.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <string.h>
 #include "pax.h"
 #include "extern.h"
 
 /*
  * a collection of general purpose subroutines used by pax
  */
 
 /*
  * constants used by ls_list() when printing out archive members
  */
 #define MODELEN 20
 #define DATELEN 64
 #define SIXMONTHS	 ((365 / 2) * 86400)
 #define CURFRMTM	"%b %e %H:%M"
 #define OLDFRMTM	"%b %e  %Y"
 #define CURFRMTD	"%e %b %H:%M"
 #define OLDFRMTD	"%e %b  %Y"
 
 static int d_first = -1;
 
 /*
  * ls_list()
  *	list the members of an archive in ls format
  */
 
 void
 ls_list(ARCHD *arcn, time_t now, FILE *fp)
 {
 	struct stat *sbp;
 	char f_mode[MODELEN];
 	char f_date[DATELEN];
 	const char *timefrmt;
 
 	/*
 	 * if not verbose, just print the file name
 	 */
 	if (!vflag) {
 		(void)fprintf(fp, "%s\n", arcn->name);
 		(void)fflush(fp);
 		return;
 	}
 
 	if (d_first < 0)
 		d_first = (*nl_langinfo(D_MD_ORDER) == 'd');
 	/*
 	 * user wants long mode
 	 */
 	sbp = &(arcn->sb);
 	strmode(sbp->st_mode, f_mode);
 
 	/*
 	 * time format based on age compared to the time pax was started.
 	 */
 	if ((sbp->st_mtime + SIXMONTHS) <= now)
 		timefrmt = d_first ? OLDFRMTD : OLDFRMTM;
 	else
 		timefrmt = d_first ? CURFRMTD : CURFRMTM;
 
 	/*
 	 * print file mode, link count, uid, gid and time
 	 */
 	if (strftime(f_date,DATELEN,timefrmt,localtime(&(sbp->st_mtime))) == 0)
 		f_date[0] = '\0';
-	(void)fprintf(fp, "%s%2u %-12s %-12s ", f_mode, sbp->st_nlink,
+	(void)fprintf(fp, "%s%2ju %-12s %-12s ", f_mode,
+		(uintmax_t)sbp->st_nlink,
 		name_uid(sbp->st_uid, 1), name_gid(sbp->st_gid, 1));
 
 	/*
 	 * print device id's for devices, or sizes for other nodes
 	 */
 	if ((arcn->type == PAX_CHR) || (arcn->type == PAX_BLK))
 #		ifdef NET2_STAT
 		(void)fprintf(fp, "%4u,%4u ", MAJOR(sbp->st_rdev),
 		    MINOR(sbp->st_rdev));
 #		else
 		(void)fprintf(fp, "%4lu,%4lu ", (unsigned long)MAJOR(sbp->st_rdev),
 		    (unsigned long)MINOR(sbp->st_rdev));
 #		endif
 	else {
 #		ifdef NET2_STAT
 		(void)fprintf(fp, "%9lu ", sbp->st_size);
 #		else
 		(void)fprintf(fp, "%9ju ", (uintmax_t)sbp->st_size);
 #		endif
 	}
 
 	/*
 	 * print name and link info for hard and soft links
 	 */
 	(void)fprintf(fp, "%s %s", f_date, arcn->name);
 	if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
 		(void)fprintf(fp, " == %s\n", arcn->ln_name);
 	else if (arcn->type == PAX_SLK)
 		(void)fprintf(fp, " => %s\n", arcn->ln_name);
 	else
 		(void)putc('\n', fp);
 	(void)fflush(fp);
 	return;
 }
 
 /*
  * tty_ls()
  * 	print a short summary of file to tty.
  */
 
 void
 ls_tty(ARCHD *arcn)
 {
 	char f_date[DATELEN];
 	char f_mode[MODELEN];
 	const char *timefrmt;
 
 	if (d_first < 0)
 		d_first = (*nl_langinfo(D_MD_ORDER) == 'd');
 
 	if ((arcn->sb.st_mtime + SIXMONTHS) <= time(NULL))
 		timefrmt = d_first ? OLDFRMTD : OLDFRMTM;
 	else
 		timefrmt = d_first ? CURFRMTD : CURFRMTM;
 
 	/*
 	 * convert time to string, and print
 	 */
 	if (strftime(f_date, DATELEN, timefrmt,
 	    localtime(&(arcn->sb.st_mtime))) == 0)
 		f_date[0] = '\0';
 	strmode(arcn->sb.st_mode, f_mode);
 	tty_prnt("%s%s %s\n", f_mode, f_date, arcn->name);
 	return;
 }
 
 /*
  * l_strncpy()
  *	copy src to dest up to len chars (stopping at first '\0').
  *	when src is shorter than len, pads to len with '\0'. 
  * Return:
  *	number of chars copied. (Note this is a real performance win over
  *	doing a strncpy(), a strlen(), and then a possible memset())
  */
 
 int
 l_strncpy(char *dest, const char *src, int len)
 {
 	char *stop;
 	char *start;
 
 	stop = dest + len;
 	start = dest;
 	while ((dest < stop) && (*src != '\0'))
 		*dest++ = *src++;
 	len = dest - start;
 	while (dest < stop)
 		*dest++ = '\0';
 	return(len);
 }
 
 /*
  * asc_ul()
  *	convert hex/octal character string into a u_long. We do not have to
  *	check for overflow! (the headers in all supported formats are not large
  *	enough to create an overflow).
  *	NOTE: strings passed to us are NOT TERMINATED.
  * Return:
  *	unsigned long value
  */
 
 u_long
 asc_ul(char *str, int len, int base)
 {
 	char *stop;
 	u_long tval = 0;
 
 	stop = str + len;
 
 	/*
 	 * skip over leading blanks and zeros
 	 */
 	while ((str < stop) && ((*str == ' ') || (*str == '0')))
 		++str;
 
 	/*
 	 * for each valid digit, shift running value (tval) over to next digit
 	 * and add next digit
 	 */
 	if (base == HEX) {
 		while (str < stop) {
 			if ((*str >= '0') && (*str <= '9'))
 				tval = (tval << 4) + (*str++ - '0');
 			else if ((*str >= 'A') && (*str <= 'F'))
 				tval = (tval << 4) + 10 + (*str++ - 'A');
 			else if ((*str >= 'a') && (*str <= 'f'))
 				tval = (tval << 4) + 10 + (*str++ - 'a');
 			else
 				break;
 		}
 	} else {
  		while ((str < stop) && (*str >= '0') && (*str <= '7'))
 			tval = (tval << 3) + (*str++ - '0');
 	}
 	return(tval);
 }
 
 /*
  * ul_asc()
  *	convert an unsigned long into an hex/oct ascii string. pads with LEADING
  *	ascii 0's to fill string completely
  *	NOTE: the string created is NOT TERMINATED.
  */
 
 int
 ul_asc(u_long val, char *str, int len, int base)
 {
 	char *pt;
 	u_long digit;
 
 	/*
 	 * WARNING str is not '\0' terminated by this routine
 	 */
 	pt = str + len - 1;
 
 	/*
 	 * do a tailwise conversion (start at right most end of string to place
 	 * least significant digit). Keep shifting until conversion value goes
 	 * to zero (all digits were converted)
 	 */
 	if (base == HEX) {
 		while (pt >= str) {
 			if ((digit = (val & 0xf)) < 10)
 				*pt-- = '0' + (char)digit;
 			else
 				*pt-- = 'a' + (char)(digit - 10);
 			if ((val = (val >> 4)) == (u_long)0)
 				break;
 		}
 	} else {
 		while (pt >= str) {
 			*pt-- = '0' + (char)(val & 0x7);
 			if ((val = (val >> 3)) == (u_long)0)
 				break;
 		}
 	}
 
 	/*
 	 * pad with leading ascii ZEROS. We return -1 if we ran out of space.
 	 */
 	while (pt >= str)
 		*pt-- = '0';
 	if (val != (u_long)0)
 		return(-1);
 	return(0);
 }
 
 #ifndef NET2_STAT
 /*
  * asc_uqd()
  *	convert hex/octal character string into a u_quad_t. We do not have to
  *	check for overflow! (the headers in all supported formats are not large
  *	enough to create an overflow).
  *	NOTE: strings passed to us are NOT TERMINATED.
  * Return:
  *	u_quad_t value
  */
 
 u_quad_t
 asc_uqd(char *str, int len, int base)
 {
 	char *stop;
 	u_quad_t tval = 0;
 
 	stop = str + len;
 
 	/*
 	 * skip over leading blanks and zeros
 	 */
 	while ((str < stop) && ((*str == ' ') || (*str == '0')))
 		++str;
 
 	/*
 	 * for each valid digit, shift running value (tval) over to next digit
 	 * and add next digit
 	 */
 	if (base == HEX) {
 		while (str < stop) {
 			if ((*str >= '0') && (*str <= '9'))
 				tval = (tval << 4) + (*str++ - '0');
 			else if ((*str >= 'A') && (*str <= 'F'))
 				tval = (tval << 4) + 10 + (*str++ - 'A');
 			else if ((*str >= 'a') && (*str <= 'f'))
 				tval = (tval << 4) + 10 + (*str++ - 'a');
 			else
 				break;
 		}
 	} else {
  		while ((str < stop) && (*str >= '0') && (*str <= '7'))
 			tval = (tval << 3) + (*str++ - '0');
 	}
 	return(tval);
 }
 
 /*
  * uqd_asc()
  *	convert an u_quad_t into a hex/oct ascii string. pads with LEADING
  *	ascii 0's to fill string completely
  *	NOTE: the string created is NOT TERMINATED.
  */
 
 int
 uqd_asc(u_quad_t val, char *str, int len, int base)
 {
 	char *pt;
 	u_quad_t digit;
 
 	/*
 	 * WARNING str is not '\0' terminated by this routine
 	 */
 	pt = str + len - 1;
 
 	/*
 	 * do a tailwise conversion (start at right most end of string to place
 	 * least significant digit). Keep shifting until conversion value goes
 	 * to zero (all digits were converted)
 	 */
 	if (base == HEX) {
 		while (pt >= str) {
 			if ((digit = (val & 0xf)) < 10)
 				*pt-- = '0' + (char)digit;
 			else
 				*pt-- = 'a' + (char)(digit - 10);
 			if ((val = (val >> 4)) == (u_quad_t)0)
 				break;
 		}
 	} else {
 		while (pt >= str) {
 			*pt-- = '0' + (char)(val & 0x7);
 			if ((val = (val >> 3)) == (u_quad_t)0)
 				break;
 		}
 	}
 
 	/*
 	 * pad with leading ascii ZEROS. We return -1 if we ran out of space.
 	 */
 	while (pt >= str)
 		*pt-- = '0';
 	if (val != (u_quad_t)0)
 		return(-1);
 	return(0);
 }
 #endif
Index: head/contrib/mtree/create.c
===================================================================
--- head/contrib/mtree/create.c	(revision 311521)
+++ head/contrib/mtree/create.c	(revision 311522)
@@ -1,476 +1,477 @@
 /*	$NetBSD: create.c,v 1.73 2014/04/24 17:22:41 christos Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
 #if defined(__RCSID) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)create.c	8.1 (Berkeley) 6/6/93";
 #else
 __RCSID("$NetBSD: create.c,v 1.73 2014/04/24 17:22:41 christos Exp $");
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #if ! HAVE_NBTOOL_CONFIG_H
 #include <dirent.h>
 #endif
 
 #include <errno.h>
 #include <fcntl.h>
 #include <grp.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 #ifndef NO_MD5
 #include <md5.h>
 #endif
 #ifndef NO_RMD160
 #include <rmd160.h>
 #endif
 #ifndef NO_SHA1
 #include <sha1.h>
 #endif
 #ifndef NO_SHA2
 #include <sha2.h>
 #endif
 
 #include "extern.h"
 
 #define	INDENTNAMELEN	15
 #define	MAXLINELEN	80
 
 static gid_t gid;
 static uid_t uid;
 static mode_t mode;
 static u_long flags;
 
 #ifdef __FreeBSD__
 #define	FTS_CONST const
 #else
 #define	FTS_CONST
 #endif
 
 static int	dcmp(const FTSENT *FTS_CONST *, const FTSENT *FTS_CONST *);
 static void	output(FILE *, int, int *, const char *, ...)
     __printflike(4, 5);
 static int	statd(FILE *, FTS *, FTSENT *, uid_t *, gid_t *, mode_t *,
     u_long *);
 static void	statf(FILE *, int, FTSENT *);
 
 void
 cwalk(FILE *fp)
 {
 	FTS *t;
 	FTSENT *p;
 	time_t clocktime;
 	char host[MAXHOSTNAMELEN + 1];
 	const char *user;
 	char *argv[2];
 	char  dot[] = ".";
 	int indent = 0;
 
 	argv[0] = dot;
 	argv[1] = NULL;
 
 	time(&clocktime);
 	gethostname(host, sizeof(host));
 	host[sizeof(host) - 1] = '\0';
 	if ((user = getlogin()) == NULL) {
 		struct passwd *pw;
 		user = (pw = getpwuid(getuid())) == NULL ? pw->pw_name :
 		    "<unknown>";
 	}
 
 	if (!nflag)
 		fprintf(fp,
 	    	    "#\t   user: %s\n#\tmachine: %s\n#\t   tree: %s\n"
 		    "#\t   date: %s",
 		    user, host, fullpath, ctime(&clocktime));
 
 	if ((t = fts_open(argv, ftsoptions, dcmp)) == NULL)
 		mtree_err("fts_open: %s", strerror(errno));
 	while ((p = fts_read(t)) != NULL) {
 		if (jflag)
 			indent = p->fts_level * 4;
 		if (check_excludes(p->fts_name, p->fts_path)) {
 			fts_set(t, p, FTS_SKIP);
 			continue;
 		}
 		if (!find_only(p->fts_path)) {
 			fts_set(t, p, FTS_SKIP);
 			continue;
 		}
 		switch(p->fts_info) {
 		case FTS_D:
 			if (!bflag)
 				fprintf(fp, "\n");
 			if (!nflag)
 				fprintf(fp, "# %s\n", p->fts_path);
 			statd(fp, t, p, &uid, &gid, &mode, &flags);
 			statf(fp, indent, p);
 			break;
 		case FTS_DP:
 			if (p->fts_level > 0)
 				if (!nflag)
 					fprintf(fp, "%*s# %s\n", indent, "",
 					    p->fts_path);
 			if (p->fts_level > 0 || flavor == F_FREEBSD9) {
 				fprintf(fp, "%*s..\n", indent, "");
 				if (!bflag)
 					fprintf(fp, "\n");
 			}
 			break;
 		case FTS_DNR:
 		case FTS_ERR:
 		case FTS_NS:
 			mtree_err("%s: %s",
 			    p->fts_path, strerror(p->fts_errno));
 			break;
 		default:
 			if (!dflag)
 				statf(fp, indent, p);
 			break;
 
 		}
 	}
 	fts_close(t);
 	if (sflag && keys & F_CKSUM)
 		mtree_err("%s checksum: %u", fullpath, crc_total);
 }
 
 static void
 statf(FILE *fp, int indent, FTSENT *p)
 {
 	u_int32_t len, val;
 	int fd, offset;
 	const char *name = NULL;
 #if !defined(NO_MD5) || !defined(NO_RMD160) || !defined(NO_SHA1) || !defined(NO_SHA2)
 	char *digestbuf;
 #endif
 
 	offset = fprintf(fp, "%*s%s%s", indent, "",
 	    S_ISDIR(p->fts_statp->st_mode) ? "" : "    ", vispath(p->fts_name));
 
 	if (offset > (INDENTNAMELEN + indent))
 		offset = MAXLINELEN;
 	else
 		offset += fprintf(fp, "%*s",
 		    (INDENTNAMELEN + indent) - offset, "");
 
 	if (!S_ISREG(p->fts_statp->st_mode) && (flavor == F_NETBSD6 || !dflag))
 		output(fp, indent, &offset, "type=%s",
 		    inotype(p->fts_statp->st_mode));
 	if (keys & (F_UID | F_UNAME) && p->fts_statp->st_uid != uid) {
 		if (keys & F_UNAME &&
 		    (name = user_from_uid(p->fts_statp->st_uid, 1)) != NULL)
 			output(fp, indent, &offset, "uname=%s", name);
 		if (keys & F_UID || (keys & F_UNAME && name == NULL))
 			output(fp, indent, &offset, "uid=%u",
 			    p->fts_statp->st_uid);
 	}
 	if (keys & (F_GID | F_GNAME) && p->fts_statp->st_gid != gid) {
 		if (keys & F_GNAME &&
 		    (name = group_from_gid(p->fts_statp->st_gid, 1)) != NULL)
 			output(fp, indent, &offset, "gname=%s", name);
 		if (keys & F_GID || (keys & F_GNAME && name == NULL))
 			output(fp, indent, &offset, "gid=%u",
 			    p->fts_statp->st_gid);
 	}
 	if (keys & F_MODE && (p->fts_statp->st_mode & MBITS) != mode)
 		output(fp, indent, &offset, "mode=%#o",
 		    p->fts_statp->st_mode & MBITS);
 	if (keys & F_DEV &&
 	    (S_ISBLK(p->fts_statp->st_mode) || S_ISCHR(p->fts_statp->st_mode)))
 		output(fp, indent, &offset, "device=%#jx",
 		    (uintmax_t)p->fts_statp->st_rdev);
 	if (keys & F_NLINK && p->fts_statp->st_nlink != 1)
-		output(fp, indent, &offset, "nlink=%u", p->fts_statp->st_nlink);
+		output(fp, indent, &offset, "nlink=%ju",
+		    (uintmax_t)p->fts_statp->st_nlink);
 	if (keys & F_SIZE &&
 	    (flavor == F_FREEBSD9 || S_ISREG(p->fts_statp->st_mode)))
 		output(fp, indent, &offset, "size=%ju",
 		    (uintmax_t)p->fts_statp->st_size);
 	if (keys & F_TIME)
 #if defined(BSD4_4) && !defined(HAVE_NBTOOL_CONFIG_H)
 		output(fp, indent, &offset, "time=%jd.%09ld",
 		    (intmax_t)p->fts_statp->st_mtimespec.tv_sec,
 		    p->fts_statp->st_mtimespec.tv_nsec);
 #else
 		output(fp, indent, &offset, "time=%jd.%09ld",
 		    (intmax_t)p->fts_statp->st_mtime, (long)0);
 #endif
 	if (keys & F_CKSUM && S_ISREG(p->fts_statp->st_mode)) {
 		if ((fd = open(p->fts_accpath, O_RDONLY, 0)) < 0 ||
 		    crc(fd, &val, &len))
 			mtree_err("%s: %s", p->fts_accpath, strerror(errno));
 		close(fd);
 		output(fp, indent, &offset, "cksum=%lu", (long)val);
 	}
 #ifndef NO_MD5
 	if (keys & F_MD5 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = MD5File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: MD5File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", MD5KEY, digestbuf);
 		free(digestbuf);
 	}
 #endif	/* ! NO_MD5 */
 #ifndef NO_RMD160
 	if (keys & F_RMD160 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = RMD160File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: RMD160File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", RMD160KEY, digestbuf);
 		free(digestbuf);
 	}
 #endif	/* ! NO_RMD160 */
 #ifndef NO_SHA1
 	if (keys & F_SHA1 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = SHA1File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: SHA1File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", SHA1KEY, digestbuf);
 		free(digestbuf);
 	}
 #endif	/* ! NO_SHA1 */
 #ifndef NO_SHA2
 	if (keys & F_SHA256 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = SHA256_File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: SHA256_File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", SHA256KEY, digestbuf);
 		free(digestbuf);
 	}
 #ifdef SHA384_BLOCK_LENGTH
 	if (keys & F_SHA384 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = SHA384_File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: SHA384_File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", SHA384KEY, digestbuf);
 		free(digestbuf);
 	}
 #endif
 	if (keys & F_SHA512 && S_ISREG(p->fts_statp->st_mode)) {
 		if ((digestbuf = SHA512_File(p->fts_accpath, NULL)) == NULL)
 			mtree_err("%s: SHA512_File failed: %s", p->fts_accpath,
 			    strerror(errno));
 		output(fp, indent, &offset, "%s=%s", SHA512KEY, digestbuf);
 		free(digestbuf);
 	}
 #endif	/* ! NO_SHA2 */
 	if (keys & F_SLINK &&
 	    (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE))
 		output(fp, indent, &offset, "link=%s",
 		    vispath(rlink(p->fts_accpath)));
 #if HAVE_STRUCT_STAT_ST_FLAGS
 	if (keys & F_FLAGS && p->fts_statp->st_flags != flags) {
 		char *str = flags_to_string(p->fts_statp->st_flags, "none");
 		output(fp, indent, &offset, "flags=%s", str);
 		free(str);
 	}
 #endif
 	putchar('\n');
 }
 
 /* XXX
  * FLAGS2INDEX will fail once the user and system settable bits need more
  * than one byte, respectively.
  */
 #define FLAGS2INDEX(x)  (((x >> 8) & 0x0000ff00) | (x & 0x000000ff))
 
 #define	MTREE_MAXGID	5000
 #define	MTREE_MAXUID	5000
 #define	MTREE_MAXMODE	(MBITS + 1)
 #if HAVE_STRUCT_STAT_ST_FLAGS
 #define	MTREE_MAXFLAGS  (FLAGS2INDEX(CH_MASK) + 1)   /* 1808 */
 #else
 #define MTREE_MAXFLAGS	1
 #endif
 #define	MTREE_MAXS 16
 
 static int
 statd(FILE *fp, FTS *t, FTSENT *parent, uid_t *puid, gid_t *pgid, mode_t *pmode,
     u_long *pflags)
 {
 	FTSENT *p;
 	gid_t sgid;
 	uid_t suid;
 	mode_t smode;
 	u_long sflags = 0;
 	const char *name = NULL;
 	gid_t savegid;
 	uid_t saveuid;
 	mode_t savemode;
 	u_long saveflags;
 	u_short maxgid, maxuid, maxmode, maxflags;
 	u_short g[MTREE_MAXGID], u[MTREE_MAXUID],
 		m[MTREE_MAXMODE], f[MTREE_MAXFLAGS];
 	static int first = 1;
 
 	savegid = *pgid;
 	saveuid = *puid;
 	savemode = *pmode;
 	saveflags = *pflags;
 	if ((p = fts_children(t, 0)) == NULL) {
 		if (errno)
 			mtree_err("%s: %s", RP(parent), strerror(errno));
 		return (1);
 	}
 
 	memset(g, 0, sizeof(g));
 	memset(u, 0, sizeof(u));
 	memset(m, 0, sizeof(m));
 	memset(f, 0, sizeof(f));
 
 	maxuid = maxgid = maxmode = maxflags = 0;
 	for (; p; p = p->fts_link) {
 		if (flavor == F_NETBSD6 || !dflag ||
 		    (dflag && S_ISDIR(p->fts_statp->st_mode))) {
 			smode = p->fts_statp->st_mode & MBITS;
 			if (smode < MTREE_MAXMODE && ++m[smode] > maxmode) {
 				savemode = smode;
 				maxmode = m[smode];
 			}
 			sgid = p->fts_statp->st_gid;
 			if (sgid < MTREE_MAXGID && ++g[sgid] > maxgid) {
 				savegid = sgid;
 				maxgid = g[sgid];
 			}
 			suid = p->fts_statp->st_uid;
 			if (suid < MTREE_MAXUID && ++u[suid] > maxuid) {
 				saveuid = suid;
 				maxuid = u[suid];
 			}
 
 #if HAVE_STRUCT_STAT_ST_FLAGS
 			sflags = FLAGS2INDEX(p->fts_statp->st_flags);
 			if (sflags < MTREE_MAXFLAGS && ++f[sflags] > maxflags) {
 				saveflags = p->fts_statp->st_flags;
 				maxflags = f[sflags];
 			}
 #endif
 		}
 	}
 	/*
 	 * If the /set record is the same as the last one we do not need to
 	 * output a new one.  So first we check to see if anything changed.
 	 * Note that we always output a /set record for the first directory.
 	 */
 	if (((keys & (F_UNAME | F_UID)) && (*puid != saveuid)) ||
 	    ((keys & (F_GNAME | F_GID)) && (*pgid != savegid)) ||
 	    ((keys & F_MODE) && (*pmode != savemode)) || 
 	    ((keys & F_FLAGS) && (*pflags != saveflags)) ||
 	    first) {
 		first = 0;
 		if (flavor != F_NETBSD6 && dflag)
 			fprintf(fp, "/set type=dir");
 		else
 			fprintf(fp, "/set type=file");
 		if (keys & (F_UID | F_UNAME)) {
 			if (keys & F_UNAME &&
 			    (name = user_from_uid(saveuid, 1)) != NULL)
 				fprintf(fp, " uname=%s", name);
 			if (keys & F_UID || (keys & F_UNAME && name == NULL))
 				fprintf(fp, " uid=%lu", (u_long)saveuid);
 		}
 		if (keys & (F_GID | F_GNAME)) {
 			if (keys & F_GNAME &&
 			    (name = group_from_gid(savegid, 1)) != NULL)
 				fprintf(fp, " gname=%s", name);
 			if (keys & F_GID || (keys & F_GNAME && name == NULL))
 				fprintf(fp, " gid=%lu", (u_long)savegid);
 		}
 		if (keys & F_MODE)
 			fprintf(fp, " mode=%#lo", (u_long)savemode);
 		if (keys & F_NLINK)
 			fprintf(fp, " nlink=1");
 		if (keys & F_FLAGS) {
 			char *str = flags_to_string(saveflags, "none");
 			fprintf(fp, " flags=%s", str);
 			free(str);
 		}
 		fprintf(fp, "\n");
 		*puid = saveuid;
 		*pgid = savegid;
 		*pmode = savemode;
 		*pflags = saveflags;
 	}
 	return (0);
 }
 
 /*
  * dcmp --
  *	used as a comparison function passed to fts_open() to control
  *	the order in which fts_read() returns results.	We make
  *	directories sort after non-directories, but otherwise sort in
  *	strcmp() order.
  *
  * Keep this in sync with nodecmp() in spec.c.
  */
 static int
 dcmp(const FTSENT *FTS_CONST *a, const FTSENT *FTS_CONST *b)
 {
 
 	if (S_ISDIR((*a)->fts_statp->st_mode)) {
 		if (!S_ISDIR((*b)->fts_statp->st_mode))
 			return (1);
 	} else if (S_ISDIR((*b)->fts_statp->st_mode))
 		return (-1);
 	return (strcmp((*a)->fts_name, (*b)->fts_name));
 }
 
 void
 output(FILE *fp, int indent, int *offset, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[1024];
 
 	va_start(ap, fmt);
 	vsnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
 
 	if (*offset + strlen(buf) > MAXLINELEN - 3) {
 		fprintf(fp, " \\\n%*s", INDENTNAMELEN + indent, "");
 		*offset = INDENTNAMELEN + indent;
 	}
 	*offset += fprintf(fp, " %s", buf) + 1;
 }
Index: head/contrib/mtree/spec.c
===================================================================
--- head/contrib/mtree/spec.c	(revision 311521)
+++ head/contrib/mtree/spec.c	(revision 311522)
@@ -1,852 +1,853 @@
 /*	$NetBSD: spec.c,v 1.89 2014/04/24 17:22:41 christos Exp $	*/
 
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*-
  * Copyright (c) 2001-2004 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Luke Mewburn of Wasabi Systems.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
 #if defined(__RCSID) && !defined(lint)
 #if 0
 static char sccsid[] = "@(#)spec.c	8.2 (Berkeley) 4/28/95";
 #else
 __RCSID("$NetBSD: spec.c,v 1.89 2014/04/24 17:22:41 christos Exp $");
 #endif
 #endif /* not lint */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <assert.h>
 #include <ctype.h>
 #include <errno.h>
 #include <grp.h>
 #include <pwd.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <vis.h>
 #include <util.h>
 
 #include "extern.h"
 #include "pack_dev.h"
 
 size_t	mtree_lineno;			/* Current spec line number */
 int	mtree_Mflag;			/* Merge duplicate entries */
 int	mtree_Wflag;			/* Don't "whack" permissions */
 int	mtree_Sflag;			/* Sort entries */
 
 static	dev_t	parsedev(char *);
 static	void	replacenode(NODE *, NODE *);
 static	void	set(char *, NODE *);
 static	void	unset(char *, NODE *);
 static	void	addchild(NODE *, NODE *);
 static	int	nodecmp(const NODE *, const NODE *);
 static	int	appendfield(FILE *, int, const char *, ...) __printflike(3, 4);
 
 #define REPLACEPTR(x,v)	do { if ((x)) free((x)); (x) = (v); } while (0)
 
 NODE *
 spec(FILE *fp)
 {
 	NODE *centry, *last, *pathparent, *cur;
 	char *p, *e, *next;
 	NODE ginfo, *root;
 	char *buf, *tname, *ntname;
 	size_t tnamelen, plen;
 
 	root = NULL;
 	centry = last = NULL;
 	tname = NULL;
 	tnamelen = 0;
 	memset(&ginfo, 0, sizeof(ginfo));
 	for (mtree_lineno = 0;
 	    (buf = fparseln(fp, NULL, &mtree_lineno, NULL,
 		FPARSELN_UNESCCOMM));
 	    free(buf)) {
 		/* Skip leading whitespace. */
 		for (p = buf; *p && isspace((unsigned char)*p); ++p)
 			continue;
 
 		/* If nothing but whitespace, continue. */
 		if (!*p)
 			continue;
 
 #ifdef DEBUG
 		fprintf(stderr, "line %lu: {%s}\n",
 		    (u_long)mtree_lineno, p);
 #endif
 		/* Grab file name, "$", "set", or "unset". */
 		next = buf;
 		while ((p = strsep(&next, " \t")) != NULL && *p == '\0')
 			continue;
 		if (p == NULL)
 			mtree_err("missing field");
 
 		if (p[0] == '/') {
 			if (strcmp(p + 1, "set") == 0)
 				set(next, &ginfo);
 			else if (strcmp(p + 1, "unset") == 0)
 				unset(next, &ginfo);
 			else
 				mtree_err("invalid specification `%s'", p);
 			continue;
 		}
 
 		if (strcmp(p, "..") == 0) {
 			/* Don't go up, if haven't gone down. */
 			if (root == NULL)
 				goto noparent;
 			if (last->type != F_DIR || last->flags & F_DONE) {
 				if (last == root)
 					goto noparent;
 				last = last->parent;
 			}
 			last->flags |= F_DONE;
 			continue;
 
 noparent:		mtree_err("no parent node");
 		}
 
 		plen = strlen(p) + 1;
 		if (plen > tnamelen) {
 			if ((ntname = realloc(tname, plen)) == NULL)
 				mtree_err("realloc: %s", strerror(errno));
 			tname = ntname;
 			tnamelen = plen;
 		}
 		if (strunvis(tname, p) == -1)
 			mtree_err("strunvis failed on `%s'", p);
 		p = tname;
 
 		pathparent = NULL;
 		if (strchr(p, '/') != NULL) {
 			cur = root;
 			for (; (e = strchr(p, '/')) != NULL; p = e+1) {
 				if (p == e)
 					continue;	/* handle // */
 				*e = '\0';
 				if (strcmp(p, ".") != 0) {
 					while (cur &&
 					    strcmp(cur->name, p) != 0) {
 						cur = cur->next;
 					}
 				}
 				if (cur == NULL || cur->type != F_DIR) {
 					mtree_err("%s: %s", tname,
 					"missing directory in specification");
 				}
 				*e = '/';
 				pathparent = cur;
 				cur = cur->child;
 			}
 			if (*p == '\0')
 				mtree_err("%s: empty leaf element", tname);
 		}
 
 		if ((centry = calloc(1, sizeof(NODE) + strlen(p))) == NULL)
 			mtree_err("%s", strerror(errno));
 		*centry = ginfo;
 		centry->lineno = mtree_lineno;
 		strcpy(centry->name, p);
 #define	MAGIC	"?*["
 		if (strpbrk(p, MAGIC))
 			centry->flags |= F_MAGIC;
 		set(next, centry);
 
 		if (root == NULL) {
 				/*
 				 * empty tree
 				 */
 			/*
 			 * Allow a bare "." root node by forcing it to
 			 * type=dir for compatibility with FreeBSD.
 			 */
 			if (strcmp(centry->name, ".") == 0 && centry->type == 0)
 				centry->type = F_DIR;
 			if (strcmp(centry->name, ".") != 0 ||
 			    centry->type != F_DIR)
 				mtree_err(
 				    "root node must be the directory `.'");
 			last = root = centry;
 			root->parent = root;
 		} else if (pathparent != NULL) {
 				/*
 				 * full path entry; add or replace
 				 */
 			centry->parent = pathparent;
 			addchild(pathparent, centry);
 			last = centry;
 		} else if (strcmp(centry->name, ".") == 0) {
 				/*
 				 * duplicate "." entry; always replace
 				 */
 			replacenode(root, centry);
 		} else if (last->type == F_DIR && !(last->flags & F_DONE)) {
 				/*
 				 * new relative child in current dir;
 				 * add or replace
 				 */
 			centry->parent = last;
 			addchild(last, centry);
 			last = centry;
 		} else {
 				/*
 				 * new relative child in parent dir
 				 * (after encountering ".." entry);
 				 * add or replace
 				 */
 			centry->parent = last->parent;
 			addchild(last->parent, centry);
 			last = centry;
 		}
 	}
 	return (root);
 }
 
 void
 free_nodes(NODE *root)
 {
 	NODE	*cur, *next;
 
 	if (root == NULL)
 		return;
 
 	next = NULL;
 	for (cur = root; cur != NULL; cur = next) {
 		next = cur->next;
 		free_nodes(cur->child);
 		REPLACEPTR(cur->slink, NULL);
 		REPLACEPTR(cur->md5digest, NULL);
 		REPLACEPTR(cur->rmd160digest, NULL);
 		REPLACEPTR(cur->sha1digest, NULL);
 		REPLACEPTR(cur->sha256digest, NULL);
 		REPLACEPTR(cur->sha384digest, NULL);
 		REPLACEPTR(cur->sha512digest, NULL);
 		REPLACEPTR(cur->tags, NULL);
 		REPLACEPTR(cur, NULL);
 	}
 }
 
 /*
  * appendfield --
  *	Like fprintf(), but output a space either before or after
  *	the regular output, according to the pathlast flag.
  */
 static int
 appendfield(FILE *fp, int pathlast, const char *fmt, ...)
 {
 	va_list ap;
 	int result;
 
 	va_start(ap, fmt);
 	if (!pathlast)
 		fprintf(fp, " ");
 	result = vprintf(fmt, ap);
 	if (pathlast)
 		fprintf(fp, " ");
 	va_end(ap);
 	return result;
 }
 
 /*
  * dump_nodes --
  *	dump the NODEs from `cur', based in the directory `dir'.
  *	if pathlast is none zero, print the path last, otherwise print
  *	it first.
  */
 void
 dump_nodes(FILE *fp, const char *dir, NODE *root, int pathlast)
 {
 	NODE	*cur;
 	char	path[MAXPATHLEN];
 	const char *name;
 	char	*str;
 	char	*p, *q;
 
 	for (cur = root; cur != NULL; cur = cur->next) {
 		if (cur->type != F_DIR && !matchtags(cur))
 			continue;
 
 		if (snprintf(path, sizeof(path), "%s%s%s",
 		    dir, *dir ? "/" : "", cur->name)
 		    >= (int)sizeof(path))
 			mtree_err("Pathname too long.");
 
 		if (!pathlast)
 			fprintf(fp, "%s", vispath(path));
 
 #define MATCHFLAG(f)	((keys & (f)) && (cur->flags & (f)))
 		if (MATCHFLAG(F_TYPE))
 			appendfield(fp, pathlast, "type=%s",
 			    nodetype(cur->type));
 		if (MATCHFLAG(F_UID | F_UNAME)) {
 			if (keys & F_UNAME &&
 			    (name = user_from_uid(cur->st_uid, 1)) != NULL)
 				appendfield(fp, pathlast, "uname=%s", name);
 			else
 				appendfield(fp, pathlast, "uid=%u",
 				    cur->st_uid);
 		}
 		if (MATCHFLAG(F_GID | F_GNAME)) {
 			if (keys & F_GNAME &&
 			    (name = group_from_gid(cur->st_gid, 1)) != NULL)
 				appendfield(fp, pathlast, "gname=%s", name);
 			else
 				appendfield(fp, pathlast, "gid=%u",
 				    cur->st_gid);
 		}
 		if (MATCHFLAG(F_MODE))
 			appendfield(fp, pathlast, "mode=%#o", cur->st_mode);
 		if (MATCHFLAG(F_DEV) &&
 		    (cur->type == F_BLOCK || cur->type == F_CHAR))
 			appendfield(fp, pathlast, "device=%#jx",
 			    (uintmax_t)cur->st_rdev);
 		if (MATCHFLAG(F_NLINK))
-			appendfield(fp, pathlast, "nlink=%d", cur->st_nlink);
+			appendfield(fp, pathlast, "nlink=%ju",
+			    (uintmax_t)cur->st_nlink);
 		if (MATCHFLAG(F_SLINK))
 			appendfield(fp, pathlast, "link=%s",
 			    vispath(cur->slink));
 		if (MATCHFLAG(F_SIZE))
 			appendfield(fp, pathlast, "size=%ju",
 			    (uintmax_t)cur->st_size);
 		if (MATCHFLAG(F_TIME))
 			appendfield(fp, pathlast, "time=%jd.%09ld",
 			    (intmax_t)cur->st_mtimespec.tv_sec,
 			    cur->st_mtimespec.tv_nsec);
 		if (MATCHFLAG(F_CKSUM))
 			appendfield(fp, pathlast, "cksum=%lu", cur->cksum);
 		if (MATCHFLAG(F_MD5))
 			appendfield(fp, pathlast, "%s=%s", MD5KEY,
 			    cur->md5digest);
 		if (MATCHFLAG(F_RMD160))
 			appendfield(fp, pathlast, "%s=%s", RMD160KEY,
 			    cur->rmd160digest);
 		if (MATCHFLAG(F_SHA1))
 			appendfield(fp, pathlast, "%s=%s", SHA1KEY,
 			    cur->sha1digest);
 		if (MATCHFLAG(F_SHA256))
 			appendfield(fp, pathlast, "%s=%s", SHA256KEY,
 			    cur->sha256digest);
 		if (MATCHFLAG(F_SHA384))
 			appendfield(fp, pathlast, "%s=%s", SHA384KEY,
 			    cur->sha384digest);
 		if (MATCHFLAG(F_SHA512))
 			appendfield(fp, pathlast, "%s=%s", SHA512KEY,
 			    cur->sha512digest);
 		if (MATCHFLAG(F_FLAGS)) {
 			str = flags_to_string(cur->st_flags, "none");
 			appendfield(fp, pathlast, "flags=%s", str);
 			free(str);
 		}
 		if (MATCHFLAG(F_IGN))
 			appendfield(fp, pathlast, "ignore");
 		if (MATCHFLAG(F_OPT))
 			appendfield(fp, pathlast, "optional");
 		if (MATCHFLAG(F_TAGS)) {
 			/* don't output leading or trailing commas */
 			p = cur->tags;
 			while (*p == ',')
 				p++;
 			q = p + strlen(p);
 			while(q > p && q[-1] == ',')
 				q--;
 			appendfield(fp, pathlast, "tags=%.*s", (int)(q - p), p);
 		}
 		puts(pathlast ? vispath(path) : "");
 
 		if (cur->child)
 			dump_nodes(fp, path, cur->child, pathlast);
 	}
 }
 
 /*
  * vispath --
  *	strsvis(3) encodes path, which must not be longer than MAXPATHLEN
  *	characters long, and returns a pointer to a static buffer containing
  *	the result.
  */
 char *
 vispath(const char *path)
 {
 	static const char extra[] = { ' ', '\t', '\n', '\\', '#', '\0' };
 	static const char extra_glob[] = { ' ', '\t', '\n', '\\', '#', '*',
 	    '?', '[', '\0' };
 	static char pathbuf[4*MAXPATHLEN + 1];
 
 	if (flavor == F_NETBSD6)
 		strsvis(pathbuf, path, VIS_CSTYLE, extra);
 	else
 		strsvis(pathbuf, path, VIS_OCTAL, extra_glob);
 	return pathbuf;
 }
 
 
 static dev_t
 parsedev(char *arg)
 {
 #define MAX_PACK_ARGS	3
 	u_long	numbers[MAX_PACK_ARGS];
 	char	*p, *ep, *dev;
 	int	argc;
 	pack_t	*pack;
 	dev_t	result;
 	const char *error = NULL;
 
 	if ((dev = strchr(arg, ',')) != NULL) {
 		*dev++='\0';
 		if ((pack = pack_find(arg)) == NULL)
 			mtree_err("unknown format `%s'", arg);
 		argc = 0;
 		while ((p = strsep(&dev, ",")) != NULL) {
 			if (*p == '\0')
 				mtree_err("missing number");
 			numbers[argc++] = strtoul(p, &ep, 0);
 			if (*ep != '\0')
 				mtree_err("invalid number `%s'",
 				    p);
 			if (argc > MAX_PACK_ARGS)
 				mtree_err("too many arguments");
 		}
 		if (argc < 2)
 			mtree_err("not enough arguments");
 		result = (*pack)(argc, numbers, &error);
 		if (error != NULL)
 			mtree_err("%s", error);
 	} else {
 		result = (dev_t)strtoul(arg, &ep, 0);
 		if (*ep != '\0')
 			mtree_err("invalid device `%s'", arg);
 	}
 	return (result);
 }
 
 static void
 replacenode(NODE *cur, NODE *new)
 {
 
 #define REPLACE(x)	cur->x = new->x
 #define REPLACESTR(x)	REPLACEPTR(cur->x,new->x)
 
 	if (cur->type != new->type) {
 		if (mtree_Mflag) {
 				/*
 				 * merge entries with different types; we
 				 * don't want children retained in this case.
 				 */
 			REPLACE(type);
 			free_nodes(cur->child);
 			cur->child = NULL;
 		} else {
 			mtree_err(
 			    "existing entry for `%s', type `%s'"
 			    " does not match type `%s'",
 			    cur->name, nodetype(cur->type),
 			    nodetype(new->type));
 		}
 	}
 
 	REPLACE(st_size);
 	REPLACE(st_mtimespec);
 	REPLACESTR(slink);
 	if (cur->slink != NULL) {
 		if ((cur->slink = strdup(new->slink)) == NULL)
 			mtree_err("memory allocation error");
 		if (strunvis(cur->slink, new->slink) == -1)
 			mtree_err("strunvis failed on `%s'", new->slink);
 		free(new->slink);
 	}
 	REPLACE(st_uid);
 	REPLACE(st_gid);
 	REPLACE(st_mode);
 	REPLACE(st_rdev);
 	REPLACE(st_flags);
 	REPLACE(st_nlink);
 	REPLACE(cksum);
 	REPLACESTR(md5digest);
 	REPLACESTR(rmd160digest);
 	REPLACESTR(sha1digest);
 	REPLACESTR(sha256digest);
 	REPLACESTR(sha384digest);
 	REPLACESTR(sha512digest);
 	REPLACESTR(tags);
 	REPLACE(lineno);
 	REPLACE(flags);
 	free(new);
 }
 
 static void
 set(char *t, NODE *ip)
 {
 	int	type, value, len;
 	gid_t	gid;
 	uid_t	uid;
 	char	*kw, *val, *md, *ep;
 	void	*m;
 
 	while ((kw = strsep(&t, "= \t")) != NULL) {
 		if (*kw == '\0')
 			continue;
 		if (strcmp(kw, "all") == 0)
 			mtree_err("invalid keyword `all'");
 		ip->flags |= type = parsekey(kw, &value);
 		if (!value)
 			/* Just set flag bit (F_IGN and F_OPT) */
 			continue;
 		while ((val = strsep(&t, " \t")) != NULL && *val == '\0')
 			continue;
 		if (val == NULL)
 			mtree_err("missing value");
 		switch (type) {
 		case F_CKSUM:
 			ip->cksum = strtoul(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid checksum `%s'", val);
 			break;
 		case F_DEV:
 			ip->st_rdev = parsedev(val);
 			break;
 		case F_FLAGS:
 			if (strcmp("none", val) == 0)
 				ip->st_flags = 0;
 			else if (string_to_flags(&val, &ip->st_flags, NULL)
 			    != 0)
 				mtree_err("invalid flag `%s'", val);
 			break;
 		case F_GID:
 			ip->st_gid = (gid_t)strtoul(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid gid `%s'", val);
 			break;
 		case F_GNAME:
 			if (mtree_Wflag)	/* don't parse if whacking */
 				break;
 			if (gid_from_group(val, &gid) == -1)
 				mtree_err("unknown group `%s'", val);
 			ip->st_gid = gid;
 			break;
 		case F_MD5:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->md5digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		case F_MODE:
 			if ((m = setmode(val)) == NULL)
 				mtree_err("cannot set file mode `%s' (%s)",
 				    val, strerror(errno));
 			ip->st_mode = getmode(m, 0);
 			free(m);
 			break;
 		case F_NLINK:
 			ip->st_nlink = (nlink_t)strtoul(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid link count `%s'", val);
 			break;
 		case F_RMD160:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->rmd160digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		case F_SHA1:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->sha1digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		case F_SIZE:
 			ip->st_size = (off_t)strtoll(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid size `%s'", val);
 			break;
 		case F_SLINK:
 			if ((ip->slink = strdup(val)) == NULL)
 				mtree_err("memory allocation error");
 			if (strunvis(ip->slink, val) == -1)
 				mtree_err("strunvis failed on `%s'", val);
 			break;
 		case F_TAGS:
 			len = strlen(val) + 3;	/* "," + str + ",\0" */
 			if ((ip->tags = malloc(len)) == NULL)
 				mtree_err("memory allocation error");
 			snprintf(ip->tags, len, ",%s,", val);
 			break;
 		case F_TIME:
 			ip->st_mtimespec.tv_sec =
 			    (time_t)strtoll(val, &ep, 10);
 			if (*ep != '.')
 				mtree_err("invalid time `%s'", val);
 			val = ep + 1;
 			ip->st_mtimespec.tv_nsec = strtol(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid time `%s'", val);
 			break;
 		case F_TYPE:
 			ip->type = parsetype(val);
 			break;
 		case F_UID:
 			ip->st_uid = (uid_t)strtoul(val, &ep, 10);
 			if (*ep)
 				mtree_err("invalid uid `%s'", val);
 			break;
 		case F_UNAME:
 			if (mtree_Wflag)	/* don't parse if whacking */
 				break;
 			if (uid_from_user(val, &uid) == -1)
 				mtree_err("unknown user `%s'", val);
 			ip->st_uid = uid;
 			break;
 		case F_SHA256:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->sha256digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		case F_SHA384:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->sha384digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		case F_SHA512:
 			if (val[0]=='0' && val[1]=='x')
 				md=&val[2];
 			else
 				md=val;
 			if ((ip->sha512digest = strdup(md)) == NULL)
 				mtree_err("memory allocation error");
 			break;
 		default:
 			mtree_err(
 			    "set(): unsupported key type 0x%x (INTERNAL ERROR)",
 			    type);
 			/* NOTREACHED */
 		}
 	}
 }
 
 static void
 unset(char *t, NODE *ip)
 {
 	char *p;
 
 	while ((p = strsep(&t, " \t")) != NULL) {
 		if (*p == '\0')
 			continue;
 		ip->flags &= ~parsekey(p, NULL);
 	}
 }
 
 /*
  * addchild --
  *	Add the centry node as a child of the pathparent node.	If
  *	centry is a duplicate, call replacenode().  If centry is not
  *	a duplicate, insert it into the linked list referenced by
  *	pathparent->child.  Keep the list sorted if Sflag is set.
  */
 static void
 addchild(NODE *pathparent, NODE *centry)
 {
 	NODE *samename;      /* node with the same name as centry */
 	NODE *replacepos;    /* if non-NULL, centry should replace this node */
 	NODE *insertpos;     /* if non-NULL, centry should be inserted
 			      * after this node */
 	NODE *cur;           /* for stepping through the list */
 	NODE *last;          /* the last node in the list */
 	int cmp;
 
 	samename = NULL;
 	replacepos = NULL;
 	insertpos = NULL;
 	last = NULL;
 	cur = pathparent->child;
 	if (cur == NULL) {
 		/* centry is pathparent's first and only child node so far */
 		pathparent->child = centry;
 		return;
 	}
 
 	/*
 	 * pathparent already has at least one other child, so add the
 	 * centry node to the list.
 	 *
 	 * We first scan through the list looking for an existing node
 	 * with the same name (setting samename), and also looking
 	 * for the correct position to replace or insert the new node
 	 * (setting replacepos and/or insertpos).
 	 */
 	for (; cur != NULL; last = cur, cur = cur->next) {
 		if (strcmp(centry->name, cur->name) == 0) {
 			samename = cur;
 		}
 		if (mtree_Sflag) {
 			cmp = nodecmp(centry, cur);
 			if (cmp == 0) {
 				replacepos = cur;
 			} else if (cmp > 0) {
 				insertpos = cur;
 			}
 		}
 	}
 	if (! mtree_Sflag) {
 		if (samename != NULL) {
 			/* replace node with same name */
 			replacepos = samename;
 		} else {
 			/* add new node at end of list */
 			insertpos = last;
 		}
 	}
 
 	if (samename != NULL) {
 		/*
 		 * We found a node with the same name above.  Call
 		 * replacenode(), which will either exit with an error,
 		 * or replace the information in the samename node and
 		 * free the information in the centry node.
 		 */
 		replacenode(samename, centry);
 		if (samename == replacepos) {
 			/* The just-replaced node was in the correct position */
 			return;
 		}
 		if (samename == insertpos || samename->prev == insertpos) {
 			/*
 			 * We thought the new node should be just before
 			 * or just after the replaced node, but that would
 			 * be equivalent to just retaining the replaced node.
 			 */
 			return;
 		}
 
 		/*
 		 * The just-replaced node is in the wrong position in
 		 * the list.  This can happen if sort order depends on
 		 * criteria other than the node name.
 		 *
 		 * Make centry point to the just-replaced node.	 Unlink
 		 * the just-replaced node from the list, and allow it to
 		 * be insterted in the correct position later.
 		 */
 		centry = samename;
 		if (centry->prev)
 			centry->prev->next = centry->next;
 		else {
 			/* centry->next is the new head of the list */
 			pathparent->child = centry->next;
 			assert(centry->next != NULL);
 		}
 		if (centry->next)
 			centry->next->prev = centry->prev;
 		centry->prev = NULL;
 		centry->next = NULL;
 	}
 
 	if (insertpos == NULL) {
 		/* insert centry at the beginning of the list */
 		pathparent->child->prev = centry;
 		centry->next = pathparent->child;
 		centry->prev = NULL;
 		pathparent->child = centry;
 	} else {
 		/* insert centry into the list just after insertpos */
 		centry->next = insertpos->next;
 		insertpos->next = centry;
 		centry->prev = insertpos;
 		if (centry->next)
 			centry->next->prev = centry;
 	}
 	return;
 }
 
 /*
  * nodecmp --
  *	used as a comparison function by addchild() to control the order
  *	in which entries appear within a list of sibling nodes.	 We make
  *	directories sort after non-directories, but otherwise sort in
  *	strcmp() order.
  *
  * Keep this in sync with dcmp() in create.c.
  */
 static int
 nodecmp(const NODE *a, const NODE *b)
 {
 
 	if ((a->type & F_DIR) != 0) {
 		if ((b->type & F_DIR) == 0)
 			return 1;
 	} else if ((b->type & F_DIR) != 0)
 		return -1;
 	return strcmp(a->name, b->name);
 }
Index: head/contrib/mtree/specspec.c
===================================================================
--- head/contrib/mtree/specspec.c	(revision 311521)
+++ head/contrib/mtree/specspec.c	(revision 311522)
@@ -1,273 +1,273 @@
 /*	$NetBSD: specspec.c,v 1.2 2012/10/05 01:27:29 christos Exp $	*/
 
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if HAVE_NBTOOL_CONFIG_H
 #include "nbtool_config.h"
 #endif
 
 #include <sys/cdefs.h>
 __RCSID("$NetBSD: specspec.c,v 1.2 2012/10/05 01:27:29 christos Exp $");
 
 #include <err.h>
 #include <grp.h>
 #include <pwd.h>
 #include <time.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include "mtree.h"
 #include "extern.h"
 
 #define FF(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && ((a)->d) != ((b)->d))
 #define FS(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && strcmp((a)->d,(b)->d))
 #define FM(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && memcmp(&(a)->d,&(b)->d, sizeof (a)->d))
 
 static void
 shownode(NODE *n, int f, char const *path)
 {
 	struct group *gr;
 	struct passwd *pw;
 
 	printf("%s%s %s", path, n->name, inotype(nodetoino(n->type)));
 	if (f & F_CKSUM)
 		printf(" cksum=%lu", n->cksum);
 	if (f & F_GID)
 		printf(" gid=%d", n->st_gid);
 	if (f & F_GNAME) {
 		gr = getgrgid(n->st_gid);
 		if (gr == NULL)
 			printf(" gid=%d", n->st_gid);
 		else
 			printf(" gname=%s", gr->gr_name);
 	}
 	if (f & F_MODE)
 		printf(" mode=%o", n->st_mode);
 	if (f & F_NLINK)
-		printf(" nlink=%d", n->st_nlink);
+		printf(" nlink=%ju", (uintmax_t)n->st_nlink);
 	if (f & F_SIZE)
 		printf(" size=%jd", (intmax_t)n->st_size);
 	if (f & F_UID)
 		printf(" uid=%d", n->st_uid);
 	if (f & F_UNAME) {
 		pw = getpwuid(n->st_uid);
 		if (pw == NULL)
 			printf(" uid=%d", n->st_uid);
 		else
 			printf(" uname=%s", pw->pw_name);
 	}
 	if (f & F_MD5)
 		printf(" %s=%s", MD5KEY, n->md5digest);
 	if (f & F_SHA1)
 		printf(" %s=%s", SHA1KEY, n->sha1digest);
 	if (f & F_RMD160)
 		printf(" %s=%s", RMD160KEY, n->rmd160digest);
 	if (f & F_SHA256)
 		printf(" %s=%s", SHA256KEY, n->sha256digest);
 	if (f & F_SHA384)
 		printf(" %s=%s", SHA384KEY, n->sha384digest);
 	if (f & F_SHA512)
 		printf(" %s=%s", SHA512KEY, n->sha512digest);
 	if (f & F_FLAGS)
 		printf(" flags=%s", flags_to_string(n->st_flags, "none"));
 	printf("\n");
 }
 
 static int
 mismatch(NODE *n1, NODE *n2, int differ, char const *path)
 {
 
 	if (n2 == NULL) {
 		shownode(n1, differ, path);
 		return (1);
 	}
 	if (n1 == NULL) {
 		printf("\t");
 		shownode(n2, differ, path);
 		return (1);
 	}
 	if (!(differ & keys))
 		return(0);
 	printf("\t\t");
 	shownode(n1, differ, path);
 	printf("\t\t");
 	shownode(n2, differ, path);
 	return (1);
 }
 
 static int
 compare_nodes(NODE *n1, NODE *n2, char const *path)
 {
 	int differs;
 	
 	if (n1 != NULL && n1->type == F_LINK)
 		n1->flags &= ~F_MODE;
 	if (n2 != NULL && n2->type == F_LINK)
 		n2->flags &= ~F_MODE;
 	differs = 0;
 	if (n1 == NULL && n2 != NULL) {
 		differs = n2->flags;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (n1 != NULL && n2 == NULL) {
 		differs = n1->flags;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (n1->type != n2->type) {
 		differs = 0;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (FF(n1, n2, F_CKSUM, cksum))
 		differs |= F_CKSUM;
 	if (FF(n1, n2, F_GID, st_gid))
 		differs |= F_GID;
 	if (FF(n1, n2, F_GNAME, st_gid))
 		differs |= F_GNAME;
 	if (FF(n1, n2, F_MODE, st_mode))
 		differs |= F_MODE;
 	if (FF(n1, n2, F_NLINK, st_nlink))
 		differs |= F_NLINK;
 	if (FF(n1, n2, F_SIZE, st_size))
 		differs |= F_SIZE;
 	if (FS(n1, n2, F_SLINK, slink))
 		differs |= F_SLINK;
 	if (FM(n1, n2, F_TIME, st_mtimespec))
 		differs |= F_TIME;
 	if (FF(n1, n2, F_UID, st_uid))
 		differs |= F_UID;
 	if (FF(n1, n2, F_UNAME, st_uid))
 		differs |= F_UNAME;
 	if (FS(n1, n2, F_MD5, md5digest))
 		differs |= F_MD5;
 	if (FS(n1, n2, F_SHA1, sha1digest))
 		differs |= F_SHA1;
 	if (FS(n1, n2, F_RMD160, rmd160digest))
 		differs |= F_RMD160;
 	if (FS(n1, n2, F_SHA256, sha256digest))
 		differs |= F_SHA256;
 	if (FS(n1, n2, F_SHA384, sha384digest))
 		differs |= F_SHA384;
 	if (FS(n1, n2, F_SHA512, sha512digest))
 		differs |= F_SHA512;
 	if (FF(n1, n2, F_FLAGS, st_flags))
 		differs |= F_FLAGS;
 	if (differs) {
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	return (0);	
 }
 static int
 walk_in_the_forest(NODE *t1, NODE *t2, char const *path)
 {
 	int r, i;
 	NODE *c1, *c2, *n1, *n2;
 	char *np;
 
 	r = 0;
 
 	if (t1 != NULL)
 		c1 = t1->child;
 	else
 		c1 = NULL;
 	if (t2 != NULL)
 		c2 = t2->child;
 	else
 		c2 = NULL;
 	while (c1 != NULL || c2 != NULL) {
 		n1 = n2 = NULL;
 		if (c1 != NULL)
 			n1 = c1->next;
 		if (c2 != NULL)
 			n2 = c2->next;
 		if (c1 != NULL && c2 != NULL) {
 			if (c1->type != F_DIR && c2->type == F_DIR) {
 				n2 = c2;
 				c2 = NULL;
 			} else if (c1->type == F_DIR && c2->type != F_DIR) {
 				n1 = c1;
 				c1 = NULL;
 			} else {
 				i = strcmp(c1->name, c2->name);
 				if (i > 0) {
 					n1 = c1;
 					c1 = NULL;
 				} else if (i < 0) {
 					n2 = c2;
 					c2 = NULL;
 				}
 			}
 		}
 		if (c1 == NULL && c2->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c2->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else if (c2 == NULL && c1->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c1->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else if (c1 == NULL || c2 == NULL) {
 			i = compare_nodes(c1, c2, path);
 		} else if (c1->type == F_DIR && c2->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c1->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else {
 			i = compare_nodes(c1, c2, path);
 		}
 		r += i;
 		c1 = n1;
 		c2 = n2;
 	}
 	return (r);	
 }
 
 int
 mtree_specspec(FILE *fi, FILE *fj)
 {
 	int rval;
 	NODE *root1, *root2;
 
 	root1 = spec(fi);
 	root2 = spec(fj);
 	rval = walk_in_the_forest(root1, root2, "");
 	rval += compare_nodes(root1, root2, "");
 	if (rval > 0)
 		return (MISMATCHEXIT);
 	return (0);
 }
Index: head/sbin/fsck_ffs/suj.c
===================================================================
--- head/sbin/fsck_ffs/suj.c	(revision 311521)
+++ head/sbin/fsck_ffs/suj.c	(revision 311522)
@@ -1,2791 +1,2801 @@
 /*-
  * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/disk.h>
 #include <sys/disklabel.h>
 #include <sys/mount.h>
 #include <sys/stat.h>
 
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/dinode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 
 #include <assert.h>
 #include <err.h>
 #include <setjmp.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <libufs.h>
 #include <string.h>
 #include <strings.h>
 #include <sysexits.h>
 #include <time.h>
 
 #include "fsck.h"
 
 #define	DOTDOT_OFFSET	DIRECTSIZ(1)
 #define	SUJ_HASHSIZE	2048
 #define	SUJ_HASHMASK	(SUJ_HASHSIZE - 1)
 #define	SUJ_HASH(x)	((x * 2654435761) & SUJ_HASHMASK)
 
 struct suj_seg {
 	TAILQ_ENTRY(suj_seg) ss_next;
 	struct jsegrec	ss_rec;
 	uint8_t		*ss_blk;
 };
 
 struct suj_rec {
 	TAILQ_ENTRY(suj_rec) sr_next;
 	union jrec	*sr_rec;
 };
 TAILQ_HEAD(srechd, suj_rec);
 
 struct suj_ino {
 	LIST_ENTRY(suj_ino)	si_next;
 	struct srechd		si_recs;
 	struct srechd		si_newrecs;
 	struct srechd		si_movs;
 	struct jtrncrec		*si_trunc;
 	ino_t			si_ino;
 	char			si_skipparent;
 	char			si_hasrecs;
 	char			si_blkadj;
 	char			si_linkadj;
 	int			si_mode;
 	nlink_t			si_nlinkadj;
 	nlink_t			si_nlink;
 	nlink_t			si_dotlinks;
 };
 LIST_HEAD(inohd, suj_ino);
 
 struct suj_blk {
 	LIST_ENTRY(suj_blk)	sb_next;
 	struct srechd		sb_recs;
 	ufs2_daddr_t		sb_blk;
 };
 LIST_HEAD(blkhd, suj_blk);
 
 struct data_blk {
 	LIST_ENTRY(data_blk)	db_next;
 	uint8_t			*db_buf;
 	ufs2_daddr_t		db_blk;
 	int			db_size;
 	int			db_dirty;
 };
 
 struct ino_blk {
 	LIST_ENTRY(ino_blk)	ib_next;
 	uint8_t			*ib_buf;
 	int			ib_dirty;
 	ufs2_daddr_t		ib_blk;
 };
 LIST_HEAD(iblkhd, ino_blk);
 
 struct suj_cg {
 	LIST_ENTRY(suj_cg)	sc_next;
 	struct blkhd		sc_blkhash[SUJ_HASHSIZE];
 	struct inohd		sc_inohash[SUJ_HASHSIZE];
 	struct iblkhd		sc_iblkhash[SUJ_HASHSIZE];
 	struct ino_blk		*sc_lastiblk;
 	struct suj_ino		*sc_lastino;
 	struct suj_blk		*sc_lastblk;
 	uint8_t			*sc_cgbuf;
 	struct cg		*sc_cgp;
 	int			sc_dirty;
 	int			sc_cgx;
 };
 
 static LIST_HEAD(cghd, suj_cg) cghash[SUJ_HASHSIZE];
 static LIST_HEAD(dblkhd, data_blk) dbhash[SUJ_HASHSIZE];
 static struct suj_cg *lastcg;
 static struct data_blk *lastblk;
 
 static TAILQ_HEAD(seghd, suj_seg) allsegs;
 static uint64_t oldseq;
 static struct uufsd *disk = NULL;
 static struct fs *fs = NULL;
 static ino_t sujino;
 
 /*
  * Summary statistics.
  */
 static uint64_t freefrags;
 static uint64_t freeblocks;
 static uint64_t freeinos;
 static uint64_t freedir;
 static uint64_t jbytes;
 static uint64_t jrecs;
 
 static jmp_buf	jmpbuf;
 
 typedef void (*ino_visitor)(ino_t, ufs_lbn_t, ufs2_daddr_t, int);
 static void err_suj(const char *, ...) __dead2;
 static void ino_trunc(ino_t, off_t);
 static void ino_decr(ino_t);
 static void ino_adjust(struct suj_ino *);
 static void ino_build(struct suj_ino *);
 static int blk_isfree(ufs2_daddr_t);
 static void initsuj(void);
 
 static void *
 errmalloc(size_t n)
 {
 	void *a;
 
 	a = Malloc(n);
 	if (a == NULL)
 		err(EX_OSERR, "malloc(%zu)", n);
 	return (a);
 }
 
 /*
  * When hit a fatal error in journalling check, print out
  * the error and then offer to fallback to normal fsck.
  */
 static void
 err_suj(const char * restrict fmt, ...)
 {
 	va_list ap;
 
 	if (preen)
 		(void)fprintf(stdout, "%s: ", cdevname);
 
 	va_start(ap, fmt);
 	(void)vfprintf(stdout, fmt, ap);
 	va_end(ap);
 
 	longjmp(jmpbuf, -1);
 }
 
 /*
  * Open the given provider, load superblock.
  */
 static void
 opendisk(const char *devnam)
 {
 	if (disk != NULL)
 		return;
 	disk = Malloc(sizeof(*disk));
 	if (disk == NULL)
 		err(EX_OSERR, "malloc(%zu)", sizeof(*disk));
 	if (ufs_disk_fillout(disk, devnam) == -1) {
 		err(EX_OSERR, "ufs_disk_fillout(%s) failed: %s", devnam,
 		    disk->d_error);
 	}
 	fs = &disk->d_fs;
 	if (real_dev_bsize == 0 && ioctl(disk->d_fd, DIOCGSECTORSIZE,
 	    &real_dev_bsize) == -1)
 		real_dev_bsize = secsize;
 	if (debug)
 		printf("dev_bsize %u\n", real_dev_bsize);
 }
 
 /*
  * Mark file system as clean, write the super-block back, close the disk.
  */
 static void
 closedisk(const char *devnam)
 {
 	struct csum *cgsum;
 	uint32_t i;
 
 	/*
 	 * Recompute the fs summary info from correct cs summaries.
 	 */
 	bzero(&fs->fs_cstotal, sizeof(struct csum_total));
 	for (i = 0; i < fs->fs_ncg; i++) {
 		cgsum = &fs->fs_cs(fs, i);
 		fs->fs_cstotal.cs_nffree += cgsum->cs_nffree;
 		fs->fs_cstotal.cs_nbfree += cgsum->cs_nbfree;
 		fs->fs_cstotal.cs_nifree += cgsum->cs_nifree;
 		fs->fs_cstotal.cs_ndir += cgsum->cs_ndir;
 	}
 	fs->fs_pendinginodes = 0;
 	fs->fs_pendingblocks = 0;
 	fs->fs_clean = 1;
 	fs->fs_time = time(NULL);
 	fs->fs_mtime = time(NULL);
 	if (sbwrite(disk, 0) == -1)
 		err(EX_OSERR, "sbwrite(%s)", devnam);
 	if (ufs_disk_close(disk) == -1)
 		err(EX_OSERR, "ufs_disk_close(%s)", devnam);
 	free(disk);
 	disk = NULL;
 	fs = NULL;
 }
 
 /*
  * Lookup a cg by number in the hash so we can keep track of which cgs
  * need stats rebuilt.
  */
 static struct suj_cg *
 cg_lookup(int cgx)
 {
 	struct cghd *hd;
 	struct suj_cg *sc;
 
 	if (cgx < 0 || cgx >= fs->fs_ncg)
 		err_suj("Bad cg number %d\n", cgx);
 	if (lastcg && lastcg->sc_cgx == cgx)
 		return (lastcg);
 	hd = &cghash[SUJ_HASH(cgx)];
 	LIST_FOREACH(sc, hd, sc_next)
 		if (sc->sc_cgx == cgx) {
 			lastcg = sc;
 			return (sc);
 		}
 	sc = errmalloc(sizeof(*sc));
 	bzero(sc, sizeof(*sc));
 	sc->sc_cgbuf = errmalloc(fs->fs_bsize);
 	sc->sc_cgp = (struct cg *)sc->sc_cgbuf;
 	sc->sc_cgx = cgx;
 	LIST_INSERT_HEAD(hd, sc, sc_next);
 	if (bread(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf,
 	    fs->fs_bsize) == -1)
 		err_suj("Unable to read cylinder group %d\n", sc->sc_cgx);
 
 	return (sc);
 }
 
 /*
  * Lookup an inode number in the hash and allocate a suj_ino if it does
  * not exist.
  */
 static struct suj_ino *
 ino_lookup(ino_t ino, int creat)
 {
 	struct suj_ino *sino;
 	struct inohd *hd;
 	struct suj_cg *sc;
 
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	if (sc->sc_lastino && sc->sc_lastino->si_ino == ino)
 		return (sc->sc_lastino);
 	hd = &sc->sc_inohash[SUJ_HASH(ino)];
 	LIST_FOREACH(sino, hd, si_next)
 		if (sino->si_ino == ino)
 			return (sino);
 	if (creat == 0)
 		return (NULL);
 	sino = errmalloc(sizeof(*sino));
 	bzero(sino, sizeof(*sino));
 	sino->si_ino = ino;
 	TAILQ_INIT(&sino->si_recs);
 	TAILQ_INIT(&sino->si_newrecs);
 	TAILQ_INIT(&sino->si_movs);
 	LIST_INSERT_HEAD(hd, sino, si_next);
 
 	return (sino);
 }
 
 /*
  * Lookup a block number in the hash and allocate a suj_blk if it does
  * not exist.
  */
 static struct suj_blk *
 blk_lookup(ufs2_daddr_t blk, int creat)
 {
 	struct suj_blk *sblk;
 	struct suj_cg *sc;
 	struct blkhd *hd;
 
 	sc = cg_lookup(dtog(fs, blk));
 	if (sc->sc_lastblk && sc->sc_lastblk->sb_blk == blk)
 		return (sc->sc_lastblk);
 	hd = &sc->sc_blkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(sblk, hd, sb_next)
 		if (sblk->sb_blk == blk)
 			return (sblk);
 	if (creat == 0)
 		return (NULL);
 	sblk = errmalloc(sizeof(*sblk));
 	bzero(sblk, sizeof(*sblk));
 	sblk->sb_blk = blk;
 	TAILQ_INIT(&sblk->sb_recs);
 	LIST_INSERT_HEAD(hd, sblk, sb_next);
 
 	return (sblk);
 }
 
 static struct data_blk *
 dblk_lookup(ufs2_daddr_t blk)
 {
 	struct data_blk *dblk;
 	struct dblkhd *hd;
 
 	hd = &dbhash[SUJ_HASH(fragstoblks(fs, blk))];
 	if (lastblk && lastblk->db_blk == blk)
 		return (lastblk);
 	LIST_FOREACH(dblk, hd, db_next)
 		if (dblk->db_blk == blk)
 			return (dblk);
 	/*
 	 * The inode block wasn't located, allocate a new one.
 	 */
 	dblk = errmalloc(sizeof(*dblk));
 	bzero(dblk, sizeof(*dblk));
 	LIST_INSERT_HEAD(hd, dblk, db_next);
 	dblk->db_blk = blk;
 	return (dblk);
 }
 
 static uint8_t *
 dblk_read(ufs2_daddr_t blk, int size)
 {
 	struct data_blk *dblk;
 
 	dblk = dblk_lookup(blk);
 	/*
 	 * I doubt size mismatches can happen in practice but it is trivial
 	 * to handle.
 	 */
 	if (size != dblk->db_size) {
 		if (dblk->db_buf)
 			free(dblk->db_buf);
 		dblk->db_buf = errmalloc(size);
 		dblk->db_size = size;
 		if (bread(disk, fsbtodb(fs, blk), dblk->db_buf, size) == -1)
 			err_suj("Failed to read data block %jd\n", blk);
 	}
 	return (dblk->db_buf);
 }
 
 static void
 dblk_dirty(ufs2_daddr_t blk)
 {
 	struct data_blk *dblk;
 
 	dblk = dblk_lookup(blk);
 	dblk->db_dirty = 1;
 }
 
 static void
 dblk_write(void)
 {
 	struct data_blk *dblk;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(dblk, &dbhash[i], db_next) {
 			if (dblk->db_dirty == 0 || dblk->db_size == 0)
 				continue;
 			if (bwrite(disk, fsbtodb(fs, dblk->db_blk),
 			    dblk->db_buf, dblk->db_size) == -1)
 				err_suj("Unable to write block %jd\n",
 				    dblk->db_blk);
 		}
 	}
 }
 
 static union dinode *
 ino_read(ino_t ino)
 {
 	struct ino_blk *iblk;
 	struct iblkhd *hd;
 	struct suj_cg *sc;
 	ufs2_daddr_t blk;
 	int off;
 
 	blk = ino_to_fsba(fs, ino);
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	iblk = sc->sc_lastiblk;
 	if (iblk && iblk->ib_blk == blk)
 		goto found;
 	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next)
 		if (iblk->ib_blk == blk)
 			goto found;
 	/*
 	 * The inode block wasn't located, allocate a new one.
 	 */
 	iblk = errmalloc(sizeof(*iblk));
 	bzero(iblk, sizeof(*iblk));
 	iblk->ib_buf = errmalloc(fs->fs_bsize);
 	iblk->ib_blk = blk;
 	LIST_INSERT_HEAD(hd, iblk, ib_next);
 	if (bread(disk, fsbtodb(fs, blk), iblk->ib_buf, fs->fs_bsize) == -1)
 		err_suj("Failed to read inode block %jd\n", blk);
 found:
 	sc->sc_lastiblk = iblk;
 	off = ino_to_fsbo(fs, ino);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		return (union dinode *)&((struct ufs1_dinode *)iblk->ib_buf)[off];
 	else
 		return (union dinode *)&((struct ufs2_dinode *)iblk->ib_buf)[off];
 }
 
 static void
 ino_dirty(ino_t ino)
 {
 	struct ino_blk *iblk;
 	struct iblkhd *hd;
 	struct suj_cg *sc;
 	ufs2_daddr_t blk;
 
 	blk = ino_to_fsba(fs, ino);
 	sc = cg_lookup(ino_to_cg(fs, ino));
 	iblk = sc->sc_lastiblk;
 	if (iblk && iblk->ib_blk == blk) {
 		iblk->ib_dirty = 1;
 		return;
 	}
 	hd = &sc->sc_iblkhash[SUJ_HASH(fragstoblks(fs, blk))];
 	LIST_FOREACH(iblk, hd, ib_next) {
 		if (iblk->ib_blk == blk) {
 			iblk->ib_dirty = 1;
 			return;
 		}
 	}
 	ino_read(ino);
 	ino_dirty(ino);
 }
 
 static void
 iblk_write(struct ino_blk *iblk)
 {
 
 	if (iblk->ib_dirty == 0)
 		return;
 	if (bwrite(disk, fsbtodb(fs, iblk->ib_blk), iblk->ib_buf,
 	    fs->fs_bsize) == -1)
 		err_suj("Failed to write inode block %jd\n", iblk->ib_blk);
 }
 
 static int
 blk_overlaps(struct jblkrec *brec, ufs2_daddr_t start, int frags)
 {
 	ufs2_daddr_t bstart;
 	ufs2_daddr_t bend;
 	ufs2_daddr_t end;
 
 	end = start + frags;
 	bstart = brec->jb_blkno + brec->jb_oldfrags;
 	bend = bstart + brec->jb_frags;
 	if (start < bend && end > bstart)
 		return (1);
 	return (0);
 }
 
 static int
 blk_equals(struct jblkrec *brec, ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t start,
     int frags)
 {
 
 	if (brec->jb_ino != ino || brec->jb_lbn != lbn)
 		return (0);
 	if (brec->jb_blkno + brec->jb_oldfrags != start)
 		return (0);
 	if (brec->jb_frags < frags)
 		return (0);
 	return (1);
 }
 
 static void
 blk_setmask(struct jblkrec *brec, int *mask)
 {
 	int i;
 
 	for (i = brec->jb_oldfrags; i < brec->jb_oldfrags + brec->jb_frags; i++)
 		*mask |= 1 << i;
 }
 
 /*
  * Determine whether a given block has been reallocated to a new location.
  * Returns a mask of overlapping bits if any frags have been reused or
  * zero if the block has not been re-used and the contents can be trusted.
  *
  * This is used to ensure that an orphaned pointer due to truncate is safe
  * to be freed.  The mask value can be used to free partial blocks.
  */
 static int
 blk_freemask(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags)
 {
 	struct suj_blk *sblk;
 	struct suj_rec *srec;
 	struct jblkrec *brec;
 	int mask;
 	int off;
 
 	/*
 	 * To be certain we're not freeing a reallocated block we lookup
 	 * this block in the blk hash and see if there is an allocation
 	 * journal record that overlaps with any fragments in the block
 	 * we're concerned with.  If any fragments have ben reallocated
 	 * the block has already been freed and re-used for another purpose.
 	 */
 	mask = 0;
 	sblk = blk_lookup(blknum(fs, blk), 0);
 	if (sblk == NULL)
 		return (0);
 	off = blk - sblk->sb_blk;
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		brec = (struct jblkrec *)srec->sr_rec;
 		/*
 		 * If the block overlaps but does not match
 		 * exactly this record refers to the current
 		 * location.
 		 */
 		if (blk_overlaps(brec, blk, frags) == 0)
 			continue;
 		if (blk_equals(brec, ino, lbn, blk, frags) == 1)
 			mask = 0;
 		else
 			blk_setmask(brec, &mask);
 	}
 	if (debug)
 		printf("blk_freemask: blk %jd sblk %jd off %d mask 0x%X\n",
 		    blk, sblk->sb_blk, off, mask);
 	return (mask >> off);
 }
 
 /*
  * Determine whether it is safe to follow an indirect.  It is not safe
  * if any part of the indirect has been reallocated or the last journal
  * entry was an allocation.  Just allocated indirects may not have valid
  * pointers yet and all of their children will have their own records.
  * It is also not safe to follow an indirect if the cg bitmap has been
  * cleared as a new allocation may write to the block prior to the journal
  * being written.
  *
  * Returns 1 if it's safe to follow the indirect and 0 otherwise.
  */
 static int
 blk_isindir(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn)
 {
 	struct suj_blk *sblk;
 	struct jblkrec *brec;
 
 	sblk = blk_lookup(blk, 0);
 	if (sblk == NULL)
 		return (1);
 	if (TAILQ_EMPTY(&sblk->sb_recs))
 		return (1);
 	brec = (struct jblkrec *)TAILQ_LAST(&sblk->sb_recs, srechd)->sr_rec;
 	if (blk_equals(brec, ino, lbn, blk, fs->fs_frag))
 		if (brec->jb_op == JOP_FREEBLK)
 			return (!blk_isfree(blk));
 	return (0);
 }
 
 /*
  * Clear an inode from the cg bitmap.  If the inode was already clear return
  * 0 so the caller knows it does not have to check the inode contents.
  */
 static int
 ino_free(ino_t ino, int mode)
 {
 	struct suj_cg *sc;
 	uint8_t *inosused;
 	struct cg *cgp;
 	int cg;
 
 	cg = ino_to_cg(fs, ino);
 	ino = ino % fs->fs_ipg;
 	sc = cg_lookup(cg);
 	cgp = sc->sc_cgp;
 	inosused = cg_inosused(cgp);
 	/*
 	 * The bitmap may never have made it to the disk so we have to
 	 * conditionally clear.  We can avoid writing the cg in this case.
 	 */
 	if (isclr(inosused, ino))
 		return (0);
 	freeinos++;
 	clrbit(inosused, ino);
 	if (ino < cgp->cg_irotor)
 		cgp->cg_irotor = ino;
 	cgp->cg_cs.cs_nifree++;
 	if ((mode & IFMT) == IFDIR) {
 		freedir++;
 		cgp->cg_cs.cs_ndir--;
 	}
 	sc->sc_dirty = 1;
 
 	return (1);
 }
 
 /*
  * Free 'frags' frags starting at filesystem block 'bno' skipping any frags
  * set in the mask.
  */
 static void
 blk_free(ufs2_daddr_t bno, int mask, int frags)
 {
 	ufs1_daddr_t fragno, cgbno;
 	struct suj_cg *sc;
 	struct cg *cgp;
 	int i, cg;
 	uint8_t *blksfree;
 
 	if (debug)
 		printf("Freeing %d frags at blk %jd mask 0x%x\n",
 		    frags, bno, mask);
 	cg = dtog(fs, bno);
 	sc = cg_lookup(cg);
 	cgp = sc->sc_cgp;
 	cgbno = dtogd(fs, bno);
 	blksfree = cg_blksfree(cgp);
 
 	/*
 	 * If it's not allocated we only wrote the journal entry
 	 * and never the bitmaps.  Here we unconditionally clear and
 	 * resolve the cg summary later.
 	 */
 	if (frags == fs->fs_frag && mask == 0) {
 		fragno = fragstoblks(fs, cgbno);
 		ffs_setblock(fs, blksfree, fragno);
 		freeblocks++;
 	} else {
 		/*
 		 * deallocate the fragment
 		 */
 		for (i = 0; i < frags; i++)
 			if ((mask & (1 << i)) == 0 && isclr(blksfree, cgbno +i)) {
 				freefrags++;
 				setbit(blksfree, cgbno + i);
 			}
 	}
 	sc->sc_dirty = 1;
 }
 
 /*
  * Returns 1 if the whole block starting at 'bno' is marked free and 0
  * otherwise.
  */
 static int
 blk_isfree(ufs2_daddr_t bno)
 {
 	struct suj_cg *sc;
 
 	sc = cg_lookup(dtog(fs, bno));
 	return ffs_isblock(fs, cg_blksfree(sc->sc_cgp), dtogd(fs, bno));
 }
 
 /*
  * Fetch an indirect block to find the block at a given lbn.  The lbn
  * may be negative to fetch a specific indirect block pointer or positive
  * to fetch a specific block.
  */
 static ufs2_daddr_t
 indir_blkatoff(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t cur, ufs_lbn_t lbn)
 {
 	ufs2_daddr_t *bap2;
 	ufs2_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs_lbn_t base;
 	int level;
 	int i;
 
 	if (blk == 0)
 		return (0);
 	level = lbn_level(cur);
 	if (level == -1)
 		err_suj("Invalid indir lbn %jd\n", lbn);
 	if (level == 0 && lbn < 0)
 		err_suj("Invalid lbn %jd\n", lbn);
 	bap2 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap1 = (void *)bap2;
 	lbnadd = 1;
 	base = -(cur + level);
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	if (lbn > 0)
 		i = (lbn - base) / lbnadd;
 	else
 		i = (-lbn - base) / lbnadd;
 	if (i < 0 || i >= NINDIR(fs))
 		err_suj("Invalid indirect index %d produced by lbn %jd\n",
 		    i, lbn);
 	if (level == 0)
 		cur = base + (i * lbnadd);
 	else
 		cur = -(base + (i * lbnadd)) - (level - 1);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		blk = bap1[i];
 	else
 		blk = bap2[i];
 	if (cur == lbn)
 		return (blk);
 	if (level == 0)
 		err_suj("Invalid lbn %jd at level 0\n", lbn);
 	return indir_blkatoff(blk, ino, cur, lbn);
 }
 
 /*
  * Finds the disk block address at the specified lbn within the inode
  * specified by ip.  This follows the whole tree and honors di_size and
  * di_extsize so it is a true test of reachability.  The lbn may be
  * negative if an extattr or indirect block is requested.
  */
 static ufs2_daddr_t
 ino_blkatoff(union dinode *ip, ino_t ino, ufs_lbn_t lbn, int *frags)
 {
 	ufs_lbn_t tmpval;
 	ufs_lbn_t cur;
 	ufs_lbn_t next;
 	int i;
 
 	/*
 	 * Handle extattr blocks first.
 	 */
 	if (lbn < 0 && lbn >= -NXADDR) {
 		lbn = -1 - lbn;
 		if (lbn > lblkno(fs, ip->dp2.di_extsize - 1))
 			return (0);
 		*frags = numfrags(fs, sblksize(fs, ip->dp2.di_extsize, lbn));
 		return (ip->dp2.di_extb[lbn]);
 	}
 	/*
 	 * Now direct and indirect.
 	 */
 	if (DIP(ip, di_mode) == IFLNK &&
 	    DIP(ip, di_size) < fs->fs_maxsymlinklen)
 		return (0);
 	if (lbn >= 0 && lbn < NDADDR) {
 		*frags = numfrags(fs, sblksize(fs, DIP(ip, di_size), lbn));
 		return (DIP(ip, di_db[lbn]));
 	}
 	*frags = fs->fs_frag;
 
 	for (i = 0, tmpval = NINDIR(fs), cur = NDADDR; i < NIADDR; i++,
 	    tmpval *= NINDIR(fs), cur = next) {
 		next = cur + tmpval;
 		if (lbn == -cur - i)
 			return (DIP(ip, di_ib[i]));
 		/*
 		 * Determine whether the lbn in question is within this tree.
 		 */
 		if (lbn < 0 && -lbn >= next)
 			continue;
 		if (lbn > 0 && lbn >= next)
 			continue;
 		return indir_blkatoff(DIP(ip, di_ib[i]), ino, -cur - i, lbn);
 	}
 	err_suj("lbn %jd not in ino\n", lbn);
 	/* NOTREACHED */
 }
 
 /*
  * Determine whether a block exists at a particular lbn in an inode.
  * Returns 1 if found, 0 if not.  lbn may be negative for indirects
  * or ext blocks.
  */
 static int
 blk_isat(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int *frags)
 {
 	union dinode *ip;
 	ufs2_daddr_t nblk;
 
 	ip = ino_read(ino);
 
 	if (DIP(ip, di_nlink) == 0 || DIP(ip, di_mode) == 0)
 		return (0);
 	nblk = ino_blkatoff(ip, ino, lbn, frags);
 
 	return (nblk == blk);
 }
 
 /*
  * Clear the directory entry at diroff that should point to child.  Minimal
  * checking is done and it is assumed that this path was verified with isat.
  */
 static void
 ino_clrat(ino_t parent, off_t diroff, ino_t child)
 {
 	union dinode *dip;
 	struct direct *dp;
 	ufs2_daddr_t blk;
 	uint8_t *block;
 	ufs_lbn_t lbn;
 	int blksize;
 	int frags;
 	int doff;
 
 	if (debug)
 		printf("Clearing inode %ju from parent %ju at offset %jd\n",
 		    (uintmax_t)child, (uintmax_t)parent, diroff);
 
 	lbn = lblkno(fs, diroff);
 	doff = blkoff(fs, diroff);
 	dip = ino_read(parent);
 	blk = ino_blkatoff(dip, parent, lbn, &frags);
 	blksize = sblksize(fs, DIP(dip, di_size), lbn);
 	block = dblk_read(blk, blksize);
 	dp = (struct direct *)&block[doff];
 	if (dp->d_ino != child)
 		errx(1, "Inode %ju does not exist in %ju at %jd",
 		    (uintmax_t)child, (uintmax_t)parent, diroff);
 	dp->d_ino = 0;
 	dblk_dirty(blk);
 	/*
 	 * The actual .. reference count will already have been removed
 	 * from the parent by the .. remref record.
 	 */
 }
 
 /*
  * Determines whether a pointer to an inode exists within a directory
  * at a specified offset.  Returns the mode of the found entry.
  */
 static int
 ino_isat(ino_t parent, off_t diroff, ino_t child, int *mode, int *isdot)
 {
 	union dinode *dip;
 	struct direct *dp;
 	ufs2_daddr_t blk;
 	uint8_t *block;
 	ufs_lbn_t lbn;
 	int blksize;
 	int frags;
 	int dpoff;
 	int doff;
 
 	*isdot = 0;
 	dip = ino_read(parent);
 	*mode = DIP(dip, di_mode);
 	if ((*mode & IFMT) != IFDIR) {
 		if (debug) {
 			/*
 			 * This can happen if the parent inode
 			 * was reallocated.
 			 */
 			if (*mode != 0)
 				printf("Directory %ju has bad mode %o\n",
 				    (uintmax_t)parent, *mode);
 			else
 				printf("Directory %ju has zero mode\n",
 				    (uintmax_t)parent);
 		}
 		return (0);
 	}
 	lbn = lblkno(fs, diroff);
 	doff = blkoff(fs, diroff);
 	blksize = sblksize(fs, DIP(dip, di_size), lbn);
 	if (diroff + DIRECTSIZ(1) > DIP(dip, di_size) || doff >= blksize) {
 		if (debug)
 			printf("ino %ju absent from %ju due to offset %jd"
 			    " exceeding size %jd\n",
 			    (uintmax_t)child, (uintmax_t)parent, diroff,
 			    DIP(dip, di_size));
 		return (0);
 	}
 	blk = ino_blkatoff(dip, parent, lbn, &frags);
 	if (blk <= 0) {
 		if (debug)
 			printf("Sparse directory %ju", (uintmax_t)parent);
 		return (0);
 	}
 	block = dblk_read(blk, blksize);
 	/*
 	 * Walk through the records from the start of the block to be
 	 * certain we hit a valid record and not some junk in the middle
 	 * of a file name.  Stop when we reach or pass the expected offset.
 	 */
 	dpoff = rounddown(doff, DIRBLKSIZ);
 	do {
 		dp = (struct direct *)&block[dpoff];
 		if (dpoff == doff)
 			break;
 		if (dp->d_reclen == 0)
 			break;
 		dpoff += dp->d_reclen;
 	} while (dpoff <= doff);
 	if (dpoff > fs->fs_bsize)
 		err_suj("Corrupt directory block in dir ino %ju\n",
 		    (uintmax_t)parent);
 	/* Not found. */
 	if (dpoff != doff) {
 		if (debug)
 			printf("ino %ju not found in %ju, lbn %jd, dpoff %d\n",
 			    (uintmax_t)child, (uintmax_t)parent, lbn, dpoff);
 		return (0);
 	}
 	/*
 	 * We found the item in question.  Record the mode and whether it's
 	 * a . or .. link for the caller.
 	 */
 	if (dp->d_ino == child) {
 		if (child == parent)
 			*isdot = 1;
 		else if (dp->d_namlen == 2 &&
 		    dp->d_name[0] == '.' && dp->d_name[1] == '.')
 			*isdot = 1;
 		*mode = DTTOIF(dp->d_type);
 		return (1);
 	}
 	if (debug)
 		printf("ino %ju doesn't match dirent ino %ju in parent %ju\n",
 		    (uintmax_t)child, (uintmax_t)dp->d_ino, (uintmax_t)parent);
 	return (0);
 }
 
 #define	VISIT_INDIR	0x0001
 #define	VISIT_EXT	0x0002
 #define	VISIT_ROOT	0x0004	/* Operation came via root & valid pointers. */
 
 /*
  * Read an indirect level which may or may not be linked into an inode.
  */
 static void
 indir_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, uint64_t *frags,
     ino_visitor visitor, int flags)
 {
 	ufs2_daddr_t *bap2;
 	ufs1_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs2_daddr_t nblk;
 	ufs_lbn_t nlbn;
 	int level;
 	int i;
 
 	/*
 	 * Don't visit indirect blocks with contents we can't trust.  This
 	 * should only happen when indir_visit() is called to complete a
 	 * truncate that never finished and not when a pointer is found via
 	 * an inode.
 	 */
 	if (blk == 0)
 		return;
 	level = lbn_level(lbn);
 	if (level == -1)
 		err_suj("Invalid level for lbn %jd\n", lbn);
 	if ((flags & VISIT_ROOT) == 0 && blk_isindir(blk, ino, lbn) == 0) {
 		if (debug)
 			printf("blk %jd ino %ju lbn %jd(%d) is not indir.\n",
 			    blk, (uintmax_t)ino, lbn, level);
 		goto out;
 	}
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	bap1 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap2 = (void *)bap1;
 	for (i = 0; i < NINDIR(fs); i++) {
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			nblk = *bap1++;
 		else
 			nblk = *bap2++;
 		if (nblk == 0)
 			continue;
 		if (level == 0) {
 			nlbn = -lbn + i * lbnadd;
 			(*frags) += fs->fs_frag;
 			visitor(ino, nlbn, nblk, fs->fs_frag);
 		} else {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			indir_visit(ino, nlbn, nblk, frags, visitor, flags);
 		}
 	}
 out:
 	if (flags & VISIT_INDIR) {
 		(*frags) += fs->fs_frag;
 		visitor(ino, lbn, blk, fs->fs_frag);
 	}
 }
 
 /*
  * Visit each block in an inode as specified by 'flags' and call a
  * callback function.  The callback may inspect or free blocks.  The
  * count of frags found according to the size in the file is returned.
  * This is not valid for sparse files but may be used to determine
  * the correct di_blocks for a file.
  */
 static uint64_t
 ino_visit(union dinode *ip, ino_t ino, ino_visitor visitor, int flags)
 {
 	ufs_lbn_t nextlbn;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 	uint64_t size;
 	uint64_t fragcnt;
 	int mode;
 	int frags;
 	int i;
 
 	size = DIP(ip, di_size);
 	mode = DIP(ip, di_mode) & IFMT;
 	fragcnt = 0;
 	if ((flags & VISIT_EXT) &&
 	    fs->fs_magic == FS_UFS2_MAGIC && ip->dp2.di_extsize) {
 		for (i = 0; i < NXADDR; i++) {
 			if (ip->dp2.di_extb[i] == 0)
 				continue;
 			frags = sblksize(fs, ip->dp2.di_extsize, i);
 			frags = numfrags(fs, frags);
 			fragcnt += frags;
 			visitor(ino, -1 - i, ip->dp2.di_extb[i], frags);
 		}
 	}
 	/* Skip datablocks for short links and devices. */
 	if (mode == IFBLK || mode == IFCHR ||
 	    (mode == IFLNK && size < fs->fs_maxsymlinklen))
 		return (fragcnt);
 	for (i = 0; i < NDADDR; i++) {
 		if (DIP(ip, di_db[i]) == 0)
 			continue;
 		frags = sblksize(fs, size, i);
 		frags = numfrags(fs, frags);
 		fragcnt += frags;
 		visitor(ino, i, DIP(ip, di_db[i]), frags);
 	}
 	/*
 	 * We know the following indirects are real as we're following
 	 * real pointers to them.
 	 */
 	flags |= VISIT_ROOT;
 	for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
 	    lbn = nextlbn) {
 		nextlbn = lbn + tmpval;
 		tmpval *= NINDIR(fs);
 		if (DIP(ip, di_ib[i]) == 0)
 			continue;
 		indir_visit(ino, -lbn - i, DIP(ip, di_ib[i]), &fragcnt, visitor,
 		    flags);
 	}
 	return (fragcnt);
 }
 
 /*
  * Null visitor function used when we just want to count blocks and
  * record the lbn.
  */
 ufs_lbn_t visitlbn;
 static void
 null_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	if (lbn > 0)
 		visitlbn = lbn;
 }
 
 /*
  * Recalculate di_blocks when we discover that a block allocation or
  * free was not successfully completed.  The kernel does not roll this back
  * because it would be too expensive to compute which indirects were
  * reachable at the time the inode was written.
  */
 static void
 ino_adjblks(struct suj_ino *sino)
 {
 	union dinode *ip;
 	uint64_t blocks;
 	uint64_t frags;
 	off_t isize;
 	off_t size;
 	ino_t ino;
 
 	ino = sino->si_ino;
 	ip = ino_read(ino);
 	/* No need to adjust zero'd inodes. */
 	if (DIP(ip, di_mode) == 0)
 		return;
 	/*
 	 * Visit all blocks and count them as well as recording the last
 	 * valid lbn in the file.  If the file size doesn't agree with the
 	 * last lbn we need to truncate to fix it.  Otherwise just adjust
 	 * the blocks count.
 	 */
 	visitlbn = 0;
 	frags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
 	blocks = fsbtodb(fs, frags);
 	/*
 	 * We assume the size and direct block list is kept coherent by
 	 * softdep.  For files that have extended into indirects we truncate
 	 * to the size in the inode or the maximum size permitted by
 	 * populated indirects.
 	 */
 	if (visitlbn >= NDADDR) {
 		isize = DIP(ip, di_size);
 		size = lblktosize(fs, visitlbn + 1);
 		if (isize > size)
 			isize = size;
 		/* Always truncate to free any unpopulated indirects. */
 		ino_trunc(sino->si_ino, isize);
 		return;
 	}
 	if (blocks == DIP(ip, di_blocks))
 		return;
 	if (debug)
 		printf("ino %ju adjusting block count from %jd to %jd\n",
 		    (uintmax_t)ino, DIP(ip, di_blocks), blocks);
 	DIP_SET(ip, di_blocks, blocks);
 	ino_dirty(ino);
 }
 
 static void
 blk_free_visit(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 
 	blk_free(blk, blk_freemask(blk, ino, lbn, frags), frags);
 }
 
 /*
  * Free a block or tree of blocks that was previously rooted in ino at
  * the given lbn.  If the lbn is an indirect all children are freed
  * recursively.
  */
 static void
 blk_free_lbn(ufs2_daddr_t blk, ino_t ino, ufs_lbn_t lbn, int frags, int follow)
 {
 	uint64_t resid;
 	int mask;
 
 	mask = blk_freemask(blk, ino, lbn, frags);
 	resid = 0;
 	if (lbn <= -NDADDR && follow && mask == 0)
 		indir_visit(ino, lbn, blk, &resid, blk_free_visit, VISIT_INDIR);
 	else
 		blk_free(blk, mask, frags);
 }
 
 static void
 ino_setskip(struct suj_ino *sino, ino_t parent)
 {
 	int isdot;
 	int mode;
 
 	if (ino_isat(sino->si_ino, DOTDOT_OFFSET, parent, &mode, &isdot))
 		sino->si_skipparent = 1;
 }
 
 static void
 ino_remref(ino_t parent, ino_t child, uint64_t diroff, int isdotdot)
 {
 	struct suj_ino *sino;
 	struct suj_rec *srec;
 	struct jrefrec *rrec;
 
 	/*
 	 * Lookup this inode to see if we have a record for it.
 	 */
 	sino = ino_lookup(child, 0);
 	/*
 	 * Tell any child directories we've already removed their
 	 * parent link cnt.  Don't try to adjust our link down again.
 	 */
 	if (sino != NULL && isdotdot == 0)
 		ino_setskip(sino, parent);
 	/*
 	 * No valid record for this inode.  Just drop the on-disk
 	 * link by one.
 	 */
 	if (sino == NULL || sino->si_hasrecs == 0) {
 		ino_decr(child);
 		return;
 	}
 	/*
 	 * Use ino_adjust() if ino_check() has already processed this
 	 * child.  If we lose the last non-dot reference to a
 	 * directory it will be discarded.
 	 */
 	if (sino->si_linkadj) {
 		sino->si_nlink--;
 		if (isdotdot)
 			sino->si_dotlinks--;
 		ino_adjust(sino);
 		return;
 	}
 	/*
 	 * If we haven't yet processed this inode we need to make
 	 * sure we will successfully discover the lost path.  If not
 	 * use nlinkadj to remember.
 	 */
 	TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 		rrec = (struct jrefrec *)srec->sr_rec;
 		if (rrec->jr_parent == parent &&
 		    rrec->jr_diroff == diroff)
 			return;
 	}
 	sino->si_nlinkadj++;
 }
 
 /*
  * Free the children of a directory when the directory is discarded.
  */
 static void
 ino_free_children(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	struct suj_ino *sino;
 	struct direct *dp;
 	off_t diroff;
 	uint8_t *block;
 	int skipparent;
 	int isdotdot;
 	int dpoff;
 	int size;
 
 	sino = ino_lookup(ino, 0);
 	if (sino)
 		skipparent = sino->si_skipparent;
 	else
 		skipparent = 0;
 	size = lfragtosize(fs, frags);
 	block = dblk_read(blk, size);
 	dp = (struct direct *)&block[0];
 	for (dpoff = 0; dpoff < size && dp->d_reclen; dpoff += dp->d_reclen) {
 		dp = (struct direct *)&block[dpoff];
 		if (dp->d_ino == 0 || dp->d_ino == WINO)
 			continue;
 		if (dp->d_namlen == 1 && dp->d_name[0] == '.')
 			continue;
 		isdotdot = dp->d_namlen == 2 && dp->d_name[0] == '.' &&
 		    dp->d_name[1] == '.';
 		if (isdotdot && skipparent == 1)
 			continue;
 		if (debug)
 			printf("Directory %ju removing ino %ju name %s\n",
 			    (uintmax_t)ino, (uintmax_t)dp->d_ino, dp->d_name);
 		diroff = lblktosize(fs, lbn) + dpoff;
 		ino_remref(ino, dp->d_ino, diroff, isdotdot);
 	}
 }
 
 /*
  * Reclaim an inode, freeing all blocks and decrementing all children's
  * link counts.  Free the inode back to the cg.
  */
 static void
 ino_reclaim(union dinode *ip, ino_t ino, int mode)
 {
 	uint32_t gen;
 
 	if (ino == ROOTINO)
 		err_suj("Attempting to free ROOTINO\n");
 	if (debug)
 		printf("Truncating and freeing ino %ju, nlink %d, mode %o\n",
 		    (uintmax_t)ino, DIP(ip, di_nlink), DIP(ip, di_mode));
 
 	/* We are freeing an inode or directory. */
 	if ((DIP(ip, di_mode) & IFMT) == IFDIR)
 		ino_visit(ip, ino, ino_free_children, 0);
 	DIP_SET(ip, di_nlink, 0);
 	ino_visit(ip, ino, blk_free_visit, VISIT_EXT | VISIT_INDIR);
 	/* Here we have to clear the inode and release any blocks it holds. */
 	gen = DIP(ip, di_gen);
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		bzero(ip, sizeof(struct ufs1_dinode));
 	else
 		bzero(ip, sizeof(struct ufs2_dinode));
 	DIP_SET(ip, di_gen, gen);
 	ino_dirty(ino);
 	ino_free(ino, mode);
 	return;
 }
 
 /*
  * Adjust an inode's link count down by one when a directory goes away.
  */
 static void
 ino_decr(ino_t ino)
 {
 	union dinode *ip;
 	int reqlink;
 	int nlink;
 	int mode;
 
 	ip = ino_read(ino);
 	nlink = DIP(ip, di_nlink);
 	mode = DIP(ip, di_mode);
 	if (nlink < 1)
 		err_suj("Inode %d link count %d invalid\n", ino, nlink);
 	if (mode == 0)
 		err_suj("Inode %d has a link of %d with 0 mode\n", ino, nlink);
 	nlink--;
 	if ((mode & IFMT) == IFDIR)
 		reqlink = 2;
 	else
 		reqlink = 1;
 	if (nlink < reqlink) {
 		if (debug)
 			printf("ino %ju not enough links to live %d < %d\n",
 			    (uintmax_t)ino, nlink, reqlink);
 		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	DIP_SET(ip, di_nlink, nlink);
 	ino_dirty(ino);
 }
 
 /*
  * Adjust the inode link count to 'nlink'.  If the count reaches zero
  * free it.
  */
 static void
 ino_adjust(struct suj_ino *sino)
 {
 	struct jrefrec *rrec;
 	struct suj_rec *srec;
 	struct suj_ino *stmp;
 	union dinode *ip;
 	nlink_t nlink;
 	int recmode;
 	int reqlink;
 	int isdot;
 	int mode;
 	ino_t ino;
 
 	nlink = sino->si_nlink;
 	ino = sino->si_ino;
 	mode = sino->si_mode & IFMT;
 	/*
 	 * If it's a directory with no dot links, it was truncated before
 	 * the name was cleared.  We need to clear the dirent that
 	 * points at it.
 	 */
 	if (mode == IFDIR && nlink == 1 && sino->si_dotlinks == 0) {
 		sino->si_nlink = nlink = 0;
 		TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 			rrec = (struct jrefrec *)srec->sr_rec;
 			if (ino_isat(rrec->jr_parent, rrec->jr_diroff, ino,
 			    &recmode, &isdot) == 0)
 				continue;
 			ino_clrat(rrec->jr_parent, rrec->jr_diroff, ino);
 			break;
 		}
 		if (srec == NULL)
 			errx(1, "Directory %ju name not found", (uintmax_t)ino);
 	}
 	/*
 	 * If it's a directory with no real names pointing to it go ahead
 	 * and truncate it.  This will free any children.
 	 */
 	if (mode == IFDIR && nlink - sino->si_dotlinks == 0) {
 		sino->si_nlink = nlink = 0;
 		/*
 		 * Mark any .. links so they know not to free this inode
 		 * when they are removed.
 		 */
 		TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 			rrec = (struct jrefrec *)srec->sr_rec;
 			if (rrec->jr_diroff == DOTDOT_OFFSET) {
 				stmp = ino_lookup(rrec->jr_parent, 0);
 				if (stmp)
 					ino_setskip(stmp, ino);
 			}
 		}
 	}
 	ip = ino_read(ino);
 	mode = DIP(ip, di_mode) & IFMT;
 	if (nlink > LINK_MAX)
-		err_suj("ino %ju nlink manipulation error, new %d, old %d\n",
-		    (uintmax_t)ino, nlink, DIP(ip, di_nlink));
+		err_suj("ino %ju nlink manipulation error, new %ju, old %d\n",
+		    (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink));
 	if (debug)
-		printf("Adjusting ino %ju, nlink %d, old link %d lastmode %o\n",
-		    (uintmax_t)ino, nlink, DIP(ip, di_nlink), sino->si_mode);
+	       printf("Adjusting ino %ju, nlink %ju, old link %d lastmode %o\n",
+		    (uintmax_t)ino, (uintmax_t)nlink, DIP(ip, di_nlink),
+		    sino->si_mode);
 	if (mode == 0) {
 		if (debug)
 			printf("ino %ju, zero inode freeing bitmap\n",
 			    (uintmax_t)ino);
 		ino_free(ino, sino->si_mode);
 		return;
 	}
 	/* XXX Should be an assert? */
 	if (mode != sino->si_mode && debug)
 		printf("ino %ju, mode %o != %o\n",
 		    (uintmax_t)ino, mode, sino->si_mode);
 	if ((mode & IFMT) == IFDIR)
 		reqlink = 2;
 	else
 		reqlink = 1;
 	/* If the inode doesn't have enough links to live, free it. */
 	if (nlink < reqlink) {
 		if (debug)
-			printf("ino %ju not enough links to live %d < %d\n",
-			    (uintmax_t)ino, nlink, reqlink);
+			printf("ino %ju not enough links to live %ju < %ju\n",
+			    (uintmax_t)ino, (uintmax_t)nlink,
+			    (uintmax_t)reqlink);
 		ino_reclaim(ip, ino, mode);
 		return;
 	}
 	/* If required write the updated link count. */
 	if (DIP(ip, di_nlink) == nlink) {
 		if (debug)
 			printf("ino %ju, link matches, skipping.\n",
 			    (uintmax_t)ino);
 		return;
 	}
 	DIP_SET(ip, di_nlink, nlink);
 	ino_dirty(ino);
 }
 
 /*
  * Truncate some or all blocks in an indirect, freeing any that are required
  * and zeroing the indirect.
  */
 static void
 indir_trunc(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, ufs_lbn_t lastlbn)
 {
 	ufs2_daddr_t *bap2;
 	ufs1_daddr_t *bap1;
 	ufs_lbn_t lbnadd;
 	ufs2_daddr_t nblk;
 	ufs_lbn_t next;
 	ufs_lbn_t nlbn;
 	int dirty;
 	int level;
 	int i;
 
 	if (blk == 0)
 		return;
 	dirty = 0;
 	level = lbn_level(lbn);
 	if (level == -1)
 		err_suj("Invalid level for lbn %jd\n", lbn);
 	lbnadd = 1;
 	for (i = level; i > 0; i--)
 		lbnadd *= NINDIR(fs);
 	bap1 = (void *)dblk_read(blk, fs->fs_bsize);
 	bap2 = (void *)bap1;
 	for (i = 0; i < NINDIR(fs); i++) {
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			nblk = *bap1++;
 		else
 			nblk = *bap2++;
 		if (nblk == 0)
 			continue;
 		if (level != 0) {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			/*
 			 * Calculate the lbn of the next indirect to
 			 * determine if any of this indirect must be
 			 * reclaimed.
 			 */
 			next = -(lbn + level) + ((i+1) * lbnadd);
 			if (next <= lastlbn)
 				continue;
 			indir_trunc(ino, nlbn, nblk, lastlbn);
 			/* If all of this indirect was reclaimed, free it. */
 			nlbn = next - lbnadd;
 			if (nlbn < lastlbn)
 				continue;
 		} else {
 			nlbn = -lbn + i * lbnadd;
 			if (nlbn < lastlbn)
 				continue;
 		}
 		dirty = 1;
 		blk_free(nblk, 0, fs->fs_frag);
 		if (fs->fs_magic == FS_UFS1_MAGIC)
 			*(bap1 - 1) = 0;
 		else
 			*(bap2 - 1) = 0;
 	}
 	if (dirty)
 		dblk_dirty(blk);
 }
 
 /*
  * Truncate an inode to the minimum of the given size or the last populated
  * block after any over size have been discarded.  The kernel would allocate
  * the last block in the file but fsck does not and neither do we.  This
  * code never extends files, only shrinks them.
  */
 static void
 ino_trunc(ino_t ino, off_t size)
 {
 	union dinode *ip;
 	ufs2_daddr_t bn;
 	uint64_t totalfrags;
 	ufs_lbn_t nextlbn;
 	ufs_lbn_t lastlbn;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 	ufs_lbn_t i;
 	int frags;
 	off_t cursize;
 	off_t off;
 	int mode;
 
 	ip = ino_read(ino);
 	mode = DIP(ip, di_mode) & IFMT;
 	cursize = DIP(ip, di_size);
 	if (debug)
 		printf("Truncating ino %ju, mode %o to size %jd from size %jd\n",
 		    (uintmax_t)ino, mode, size, cursize);
 
 	/* Skip datablocks for short links and devices. */
 	if (mode == 0 || mode == IFBLK || mode == IFCHR ||
 	    (mode == IFLNK && cursize < fs->fs_maxsymlinklen))
 		return;
 	/* Don't extend. */
 	if (size > cursize)
 		size = cursize;
 	lastlbn = lblkno(fs, blkroundup(fs, size));
 	for (i = lastlbn; i < NDADDR; i++) {
 		if (DIP(ip, di_db[i]) == 0)
 			continue;
 		frags = sblksize(fs, cursize, i);
 		frags = numfrags(fs, frags);
 		blk_free(DIP(ip, di_db[i]), 0, frags);
 		DIP_SET(ip, di_db[i], 0);
 	}
 	/*
 	 * Follow indirect blocks, freeing anything required.
 	 */
 	for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR; i++,
 	    lbn = nextlbn) {
 		nextlbn = lbn + tmpval;
 		tmpval *= NINDIR(fs);
 		/* If we're not freeing any in this indirect range skip it. */
 		if (lastlbn >= nextlbn)
 			continue;
 		if (DIP(ip, di_ib[i]) == 0)
 			continue;
 		indir_trunc(ino, -lbn - i, DIP(ip, di_ib[i]), lastlbn);
 		/* If we freed everything in this indirect free the indir. */
 		if (lastlbn > lbn)
 			continue;
 		blk_free(DIP(ip, di_ib[i]), 0, frags);
 		DIP_SET(ip, di_ib[i], 0);
 	}
 	ino_dirty(ino);
 	/*
 	 * Now that we've freed any whole blocks that exceed the desired
 	 * truncation size, figure out how many blocks remain and what the
 	 * last populated lbn is.  We will set the size to this last lbn
 	 * rather than worrying about allocating the final lbn as the kernel
 	 * would've done.  This is consistent with normal fsck behavior.
 	 */
 	visitlbn = 0;
 	totalfrags = ino_visit(ip, ino, null_visit, VISIT_INDIR | VISIT_EXT);
 	if (size > lblktosize(fs, visitlbn + 1))
 		size = lblktosize(fs, visitlbn + 1);
 	/*
 	 * If we're truncating direct blocks we have to adjust frags
 	 * accordingly.
 	 */
 	if (visitlbn < NDADDR && totalfrags) {
 		long oldspace, newspace;
 
 		bn = DIP(ip, di_db[visitlbn]);
 		if (bn == 0)
 			err_suj("Bad blk at ino %ju lbn %jd\n",
 			    (uintmax_t)ino, visitlbn);
 		oldspace = sblksize(fs, cursize, visitlbn);
 		newspace = sblksize(fs, size, visitlbn);
 		if (oldspace != newspace) {
 			bn += numfrags(fs, newspace);
 			frags = numfrags(fs, oldspace - newspace);
 			blk_free(bn, 0, frags);
 			totalfrags -= frags;
 		}
 	}
 	DIP_SET(ip, di_blocks, fsbtodb(fs, totalfrags));
 	DIP_SET(ip, di_size, size);
 	/*
 	 * If we've truncated into the middle of a block or frag we have
 	 * to zero it here.  Otherwise the file could extend into
 	 * uninitialized space later.
 	 */
 	off = blkoff(fs, size);
 	if (off && DIP(ip, di_mode) != IFDIR) {
 		uint8_t *buf;
 		long clrsize;
 
 		bn = ino_blkatoff(ip, ino, visitlbn, &frags);
 		if (bn == 0)
 			err_suj("Block missing from ino %ju at lbn %jd\n",
 			    (uintmax_t)ino, visitlbn);
 		clrsize = frags * fs->fs_fsize;
 		buf = dblk_read(bn, clrsize);
 		clrsize -= off;
 		buf += off;
 		bzero(buf, clrsize);
 		dblk_dirty(bn);
 	}
 	return;
 }
 
 /*
  * Process records available for one inode and determine whether the
  * link count is correct or needs adjusting.
  */
 static void
 ino_check(struct suj_ino *sino)
 {
 	struct suj_rec *srec;
 	struct jrefrec *rrec;
 	nlink_t dotlinks;
 	int newlinks;
 	int removes;
 	int nlink;
 	ino_t ino;
 	int isdot;
 	int isat;
 	int mode;
 
 	if (sino->si_hasrecs == 0)
 		return;
 	ino = sino->si_ino;
 	rrec = (struct jrefrec *)TAILQ_FIRST(&sino->si_recs)->sr_rec;
 	nlink = rrec->jr_nlink;
 	newlinks = 0;
 	dotlinks = 0;
 	removes = sino->si_nlinkadj;
 	TAILQ_FOREACH(srec, &sino->si_recs, sr_next) {
 		rrec = (struct jrefrec *)srec->sr_rec;
 		isat = ino_isat(rrec->jr_parent, rrec->jr_diroff,
 		    rrec->jr_ino, &mode, &isdot);
 		if (isat && (mode & IFMT) != (rrec->jr_mode & IFMT))
 			err_suj("Inode mode/directory type mismatch %o != %o\n",
 			    mode, rrec->jr_mode);
 		if (debug)
-			printf("jrefrec: op %d ino %ju, nlink %d, parent %d, "
+			printf("jrefrec: op %d ino %ju, nlink %ju, parent %ju, "
 			    "diroff %jd, mode %o, isat %d, isdot %d\n",
 			    rrec->jr_op, (uintmax_t)rrec->jr_ino,
-			    rrec->jr_nlink, rrec->jr_parent, rrec->jr_diroff,
+			    (uintmax_t)rrec->jr_nlink,
+			    (uintmax_t)rrec->jr_parent,
+			    (uintmax_t)rrec->jr_diroff,
 			    rrec->jr_mode, isat, isdot);
 		mode = rrec->jr_mode & IFMT;
 		if (rrec->jr_op == JOP_REMREF)
 			removes++;
 		newlinks += isat;
 		if (isdot)
 			dotlinks += isat;
 	}
 	/*
 	 * The number of links that remain are the starting link count
 	 * subtracted by the total number of removes with the total
 	 * links discovered back in.  An incomplete remove thus
 	 * makes no change to the link count but an add increases
 	 * by one.
 	 */
 	if (debug)
-		printf("ino %ju nlink %d newlinks %d removes %d dotlinks %d\n",
-		    (uintmax_t)ino, nlink, newlinks, removes, dotlinks);
+		printf(
+		    "ino %ju nlink %ju newlinks %ju removes %ju dotlinks %ju\n",
+		    (uintmax_t)ino, (uintmax_t)nlink, (uintmax_t)newlinks,
+		    (uintmax_t)removes, (uintmax_t)dotlinks);
 	nlink += newlinks;
 	nlink -= removes;
 	sino->si_linkadj = 1;
 	sino->si_nlink = nlink;
 	sino->si_dotlinks = dotlinks;
 	sino->si_mode = mode;
 	ino_adjust(sino);
 }
 
 /*
  * Process records available for one block and determine whether it is
  * still allocated and whether the owning inode needs to be updated or
  * a free completed.
  */
 static void
 blk_check(struct suj_blk *sblk)
 {
 	struct suj_rec *srec;
 	struct jblkrec *brec;
 	struct suj_ino *sino;
 	ufs2_daddr_t blk;
 	int mask;
 	int frags;
 	int isat;
 
 	/*
 	 * Each suj_blk actually contains records for any fragments in that
 	 * block.  As a result we must evaluate each record individually.
 	 */
 	sino = NULL;
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		brec = (struct jblkrec *)srec->sr_rec;
 		frags = brec->jb_frags;
 		blk = brec->jb_blkno + brec->jb_oldfrags;
 		isat = blk_isat(brec->jb_ino, brec->jb_lbn, blk, &frags);
 		if (sino == NULL || sino->si_ino != brec->jb_ino) {
 			sino = ino_lookup(brec->jb_ino, 1);
 			sino->si_blkadj = 1;
 		}
 		if (debug)
 			printf("op %d blk %jd ino %ju lbn %jd frags %d isat %d (%d)\n",
 			    brec->jb_op, blk, (uintmax_t)brec->jb_ino,
 			    brec->jb_lbn, brec->jb_frags, isat, frags);
 		/*
 		 * If we found the block at this address we still have to
 		 * determine if we need to free the tail end that was
 		 * added by adding contiguous fragments from the same block.
 		 */
 		if (isat == 1) {
 			if (frags == brec->jb_frags)
 				continue;
 			mask = blk_freemask(blk, brec->jb_ino, brec->jb_lbn,
 			    brec->jb_frags);
 			mask >>= frags;
 			blk += frags;
 			frags = brec->jb_frags - frags;
 			blk_free(blk, mask, frags);
 			continue;
 		}
 		/*
 	 	 * The block wasn't found, attempt to free it.  It won't be
 		 * freed if it was actually reallocated.  If this was an
 		 * allocation we don't want to follow indirects as they
 		 * may not be written yet.  Any children of the indirect will
 		 * have their own records.  If it's a free we need to
 		 * recursively free children.
 		 */
 		blk_free_lbn(blk, brec->jb_ino, brec->jb_lbn, brec->jb_frags,
 		    brec->jb_op == JOP_FREEBLK);
 	}
 }
 
 /*
  * Walk the list of inode records for this cg and resolve moved and duplicate
  * inode references now that we have a complete picture.
  */
 static void
 cg_build(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
 			ino_build(sino);
 }
 
 /*
  * Handle inodes requiring truncation.  This must be done prior to
  * looking up any inodes in directories.
  */
 static void
 cg_trunc(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
 			if (sino->si_trunc) {
 				ino_trunc(sino->si_ino,
 				    sino->si_trunc->jt_size);
 				sino->si_blkadj = 0;
 				sino->si_trunc = NULL;
 			}
 			if (sino->si_blkadj)
 				ino_adjblks(sino);
 		}
 	}
 }
 
 static void
 cg_adj_blk(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next) {
 			if (sino->si_blkadj)
 				ino_adjblks(sino);
 		}
 	}
 }
 
 /*
  * Free any partially allocated blocks and then resolve inode block
  * counts.
  */
 static void
 cg_check_blk(struct suj_cg *sc)
 {
 	struct suj_blk *sblk;
 	int i;
 
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sblk, &sc->sc_blkhash[i], sb_next)
 			blk_check(sblk);
 }
 
 /*
  * Walk the list of inode records for this cg, recovering any
  * changes which were not complete at the time of crash.
  */
 static void
 cg_check_ino(struct suj_cg *sc)
 {
 	struct suj_ino *sino;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(sino, &sc->sc_inohash[i], si_next)
 			ino_check(sino);
 }
 
 /*
  * Write a potentially dirty cg.  Recalculate the summary information and
  * update the superblock summary.
  */
 static void
 cg_write(struct suj_cg *sc)
 {
 	ufs1_daddr_t fragno, cgbno, maxbno;
 	u_int8_t *blksfree;
 	struct cg *cgp;
 	int blk;
 	int i;
 
 	if (sc->sc_dirty == 0)
 		return;
 	/*
 	 * Fix the frag and cluster summary.
 	 */
 	cgp = sc->sc_cgp;
 	cgp->cg_cs.cs_nbfree = 0;
 	cgp->cg_cs.cs_nffree = 0;
 	bzero(&cgp->cg_frsum, sizeof(cgp->cg_frsum));
 	maxbno = fragstoblks(fs, fs->fs_fpg);
 	if (fs->fs_contigsumsize > 0) {
 		for (i = 1; i <= fs->fs_contigsumsize; i++)
 			cg_clustersum(cgp)[i] = 0;
 		bzero(cg_clustersfree(cgp), howmany(maxbno, CHAR_BIT));
 	}
 	blksfree = cg_blksfree(cgp);
 	for (cgbno = 0; cgbno < maxbno; cgbno++) {
 		if (ffs_isfreeblock(fs, blksfree, cgbno))
 			continue;
 		if (ffs_isblock(fs, blksfree, cgbno)) {
 			ffs_clusteracct(fs, cgp, cgbno, 1);
 			cgp->cg_cs.cs_nbfree++;
 			continue;
 		}
 		fragno = blkstofrags(fs, cgbno);
 		blk = blkmap(fs, blksfree, fragno);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		for (i = 0; i < fs->fs_frag; i++)
 			if (isset(blksfree, fragno + i))
 				cgp->cg_cs.cs_nffree++;
 	}
 	/*
 	 * Update the superblock cg summary from our now correct values
 	 * before writing the block.
 	 */
 	fs->fs_cs(fs, sc->sc_cgx) = cgp->cg_cs;
 	if (bwrite(disk, fsbtodb(fs, cgtod(fs, sc->sc_cgx)), sc->sc_cgbuf,
 	    fs->fs_bsize) == -1)
 		err_suj("Unable to write cylinder group %d\n", sc->sc_cgx);
 }
 
 /*
  * Write out any modified inodes.
  */
 static void
 cg_write_inos(struct suj_cg *sc)
 {
 	struct ino_blk *iblk;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(iblk, &sc->sc_iblkhash[i], ib_next)
 			if (iblk->ib_dirty)
 				iblk_write(iblk);
 }
 
 static void
 cg_apply(void (*apply)(struct suj_cg *))
 {
 	struct suj_cg *scg;
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++)
 		LIST_FOREACH(scg, &cghash[i], sc_next)
 			apply(scg);
 }
 
 /*
  * Process the unlinked but referenced file list.  Freeing all inodes.
  */
 static void
 ino_unlinked(void)
 {
 	union dinode *ip;
 	uint16_t mode;
 	ino_t inon;
 	ino_t ino;
 
 	ino = fs->fs_sujfree;
 	fs->fs_sujfree = 0;
 	while (ino != 0) {
 		ip = ino_read(ino);
 		mode = DIP(ip, di_mode) & IFMT;
 		inon = DIP(ip, di_freelink);
 		DIP_SET(ip, di_freelink, 0);
 		/*
 		 * XXX Should this be an errx?
 		 */
 		if (DIP(ip, di_nlink) == 0) {
 			if (debug)
 				printf("Freeing unlinked ino %ju mode %o\n",
 				    (uintmax_t)ino, mode);
 			ino_reclaim(ip, ino, mode);
 		} else if (debug)
 			printf("Skipping ino %ju mode %o with link %d\n",
 			    (uintmax_t)ino, mode, DIP(ip, di_nlink));
 		ino = inon;
 	}
 }
 
 /*
  * Append a new record to the list of records requiring processing.
  */
 static void
 ino_append(union jrec *rec)
 {
 	struct jrefrec *refrec;
 	struct jmvrec *mvrec;
 	struct suj_ino *sino;
 	struct suj_rec *srec;
 
 	mvrec = &rec->rec_jmvrec;
 	refrec = &rec->rec_jrefrec;
 	if (debug && mvrec->jm_op == JOP_MVREF)
-		printf("ino move: ino %d, parent %d, diroff %jd, oldoff %jd\n",
-		    mvrec->jm_ino, mvrec->jm_parent, mvrec->jm_newoff,
-		    mvrec->jm_oldoff);
+		printf("ino move: ino %ju, parent %ju, "
+		    "diroff %jd, oldoff %jd\n",
+		    (uintmax_t)mvrec->jm_ino, (uintmax_t)mvrec->jm_parent,
+		    (uintmax_t)mvrec->jm_newoff, (uintmax_t)mvrec->jm_oldoff);
 	else if (debug &&
 	    (refrec->jr_op == JOP_ADDREF || refrec->jr_op == JOP_REMREF))
-		printf("ino ref: op %d, ino %d, nlink %d, "
-		    "parent %d, diroff %jd\n",
-		    refrec->jr_op, refrec->jr_ino, refrec->jr_nlink,
-		    refrec->jr_parent, refrec->jr_diroff);
+		printf("ino ref: op %d, ino %ju, nlink %ju, "
+		    "parent %ju, diroff %jd\n",
+		    refrec->jr_op, (uintmax_t)refrec->jr_ino,
+		    (uintmax_t)refrec->jr_nlink,
+		    (uintmax_t)refrec->jr_parent, (uintmax_t)refrec->jr_diroff);
 	sino = ino_lookup(((struct jrefrec *)rec)->jr_ino, 1);
 	sino->si_hasrecs = 1;
 	srec = errmalloc(sizeof(*srec));
 	srec->sr_rec = rec;
 	TAILQ_INSERT_TAIL(&sino->si_newrecs, srec, sr_next);
 }
 
 /*
  * Add a reference adjustment to the sino list and eliminate dups.  The
  * primary loop in ino_build_ref() checks for dups but new ones may be
  * created as a result of offset adjustments.
  */
 static void
 ino_add_ref(struct suj_ino *sino, struct suj_rec *srec)
 {
 	struct jrefrec *refrec;
 	struct suj_rec *srn;
 	struct jrefrec *rrn;
 
 	refrec = (struct jrefrec *)srec->sr_rec;
 	/*
 	 * We walk backwards so that the oldest link count is preserved.  If
 	 * an add record conflicts with a remove keep the remove.  Redundant
 	 * removes are eliminated in ino_build_ref.  Otherwise we keep the
 	 * oldest record at a given location.
 	 */
 	for (srn = TAILQ_LAST(&sino->si_recs, srechd); srn;
 	    srn = TAILQ_PREV(srn, srechd, sr_next)) {
 		rrn = (struct jrefrec *)srn->sr_rec;
 		if (rrn->jr_parent != refrec->jr_parent ||
 		    rrn->jr_diroff != refrec->jr_diroff)
 			continue;
 		if (rrn->jr_op == JOP_REMREF || refrec->jr_op == JOP_ADDREF) {
 			rrn->jr_mode = refrec->jr_mode;
 			return;
 		}
 		/*
 		 * Adding a remove.
 		 *
 		 * Replace the record in place with the old nlink in case
 		 * we replace the head of the list.  Abandon srec as a dup.
 		 */
 		refrec->jr_nlink = rrn->jr_nlink;
 		srn->sr_rec = srec->sr_rec;
 		return;
 	}
 	TAILQ_INSERT_TAIL(&sino->si_recs, srec, sr_next);
 }
 
 /*
  * Create a duplicate of a reference at a previous location.
  */
 static void
 ino_dup_ref(struct suj_ino *sino, struct jrefrec *refrec, off_t diroff)
 {
 	struct jrefrec *rrn;
 	struct suj_rec *srn;
 
 	rrn = errmalloc(sizeof(*refrec));
 	*rrn = *refrec;
 	rrn->jr_op = JOP_ADDREF;
 	rrn->jr_diroff = diroff;
 	srn = errmalloc(sizeof(*srn));
 	srn->sr_rec = (union jrec *)rrn;
 	ino_add_ref(sino, srn);
 }
 
 /*
  * Add a reference to the list at all known locations.  We follow the offset
  * changes for a single instance and create duplicate add refs at each so
  * that we can tolerate any version of the directory block.  Eliminate
  * removes which collide with adds that are seen in the journal.  They should
  * not adjust the link count down.
  */
 static void
 ino_build_ref(struct suj_ino *sino, struct suj_rec *srec)
 {
 	struct jrefrec *refrec;
 	struct jmvrec *mvrec;
 	struct suj_rec *srp;
 	struct suj_rec *srn;
 	struct jrefrec *rrn;
 	off_t diroff;
 
 	refrec = (struct jrefrec *)srec->sr_rec;
 	/*
 	 * Search for a mvrec that matches this offset.  Whether it's an add
 	 * or a remove we can delete the mvref after creating a dup record in
 	 * the old location.
 	 */
 	if (!TAILQ_EMPTY(&sino->si_movs)) {
 		diroff = refrec->jr_diroff;
 		for (srn = TAILQ_LAST(&sino->si_movs, srechd); srn; srn = srp) {
 			srp = TAILQ_PREV(srn, srechd, sr_next);
 			mvrec = (struct jmvrec *)srn->sr_rec;
 			if (mvrec->jm_parent != refrec->jr_parent ||
 			    mvrec->jm_newoff != diroff)
 				continue;
 			diroff = mvrec->jm_oldoff;
 			TAILQ_REMOVE(&sino->si_movs, srn, sr_next);
 			free(srn);
 			ino_dup_ref(sino, refrec, diroff);
 		}
 	}
 	/*
 	 * If a remove wasn't eliminated by an earlier add just append it to
 	 * the list.
 	 */
 	if (refrec->jr_op == JOP_REMREF) {
 		ino_add_ref(sino, srec);
 		return;
 	}
 	/*
 	 * Walk the list of records waiting to be added to the list.  We
 	 * must check for moves that apply to our current offset and remove
 	 * them from the list.  Remove any duplicates to eliminate removes
 	 * with corresponding adds.
 	 */
 	TAILQ_FOREACH_SAFE(srn, &sino->si_newrecs, sr_next, srp) {
 		switch (srn->sr_rec->rec_jrefrec.jr_op) {
 		case JOP_ADDREF:
 			/*
 			 * This should actually be an error we should
 			 * have a remove for every add journaled.
 			 */
 			rrn = (struct jrefrec *)srn->sr_rec;
 			if (rrn->jr_parent != refrec->jr_parent ||
 			    rrn->jr_diroff != refrec->jr_diroff)
 				break;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			break;
 		case JOP_REMREF:
 			/*
 			 * Once we remove the current iteration of the
 			 * record at this address we're done.
 			 */
 			rrn = (struct jrefrec *)srn->sr_rec;
 			if (rrn->jr_parent != refrec->jr_parent ||
 			    rrn->jr_diroff != refrec->jr_diroff)
 				break;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			ino_add_ref(sino, srec);
 			return;
 		case JOP_MVREF:
 			/*
 			 * Update our diroff based on any moves that match
 			 * and remove the move.
 			 */
 			mvrec = (struct jmvrec *)srn->sr_rec;
 			if (mvrec->jm_parent != refrec->jr_parent ||
 			    mvrec->jm_oldoff != refrec->jr_diroff)
 				break;
 			ino_dup_ref(sino, refrec, mvrec->jm_oldoff);
 			refrec->jr_diroff = mvrec->jm_newoff;
 			TAILQ_REMOVE(&sino->si_newrecs, srn, sr_next);
 			break;
 		default:
 			err_suj("ino_build_ref: Unknown op %d\n",
 			    srn->sr_rec->rec_jrefrec.jr_op);
 		}
 	}
 	ino_add_ref(sino, srec);
 }
 
 /*
  * Walk the list of new records and add them in-order resolving any
  * dups and adjusted offsets.
  */
 static void
 ino_build(struct suj_ino *sino)
 {
 	struct suj_rec *srec;
 
 	while ((srec = TAILQ_FIRST(&sino->si_newrecs)) != NULL) {
 		TAILQ_REMOVE(&sino->si_newrecs, srec, sr_next);
 		switch (srec->sr_rec->rec_jrefrec.jr_op) {
 		case JOP_ADDREF:
 		case JOP_REMREF:
 			ino_build_ref(sino, srec);
 			break;
 		case JOP_MVREF:
 			/*
 			 * Add this mvrec to the queue of pending mvs.
 			 */
 			TAILQ_INSERT_TAIL(&sino->si_movs, srec, sr_next);
 			break;
 		default:
 			err_suj("ino_build: Unknown op %d\n",
 			    srec->sr_rec->rec_jrefrec.jr_op);
 		}
 	}
 	if (TAILQ_EMPTY(&sino->si_recs))
 		sino->si_hasrecs = 0;
 }
 
 /*
  * Modify journal records so they refer to the base block number
  * and a start and end frag range.  This is to facilitate the discovery
  * of overlapping fragment allocations.
  */
 static void
 blk_build(struct jblkrec *blkrec)
 {
 	struct suj_rec *srec;
 	struct suj_blk *sblk;
 	struct jblkrec *blkrn;
 	ufs2_daddr_t blk;
 	int frag;
 
 	if (debug)
 		printf("blk_build: op %d blkno %jd frags %d oldfrags %d "
-		    "ino %d lbn %jd\n",
-		    blkrec->jb_op, blkrec->jb_blkno, blkrec->jb_frags,
-		    blkrec->jb_oldfrags, blkrec->jb_ino, blkrec->jb_lbn);
+		    "ino %ju lbn %jd\n",
+		    blkrec->jb_op, (uintmax_t)blkrec->jb_blkno,
+		    blkrec->jb_frags, blkrec->jb_oldfrags,
+		    (uintmax_t)blkrec->jb_ino, (uintmax_t)blkrec->jb_lbn);
 
 	blk = blknum(fs, blkrec->jb_blkno);
 	frag = fragnum(fs, blkrec->jb_blkno);
 	sblk = blk_lookup(blk, 1);
 	/*
 	 * Rewrite the record using oldfrags to indicate the offset into
 	 * the block.  Leave jb_frags as the actual allocated count.
 	 */
 	blkrec->jb_blkno -= frag;
 	blkrec->jb_oldfrags = frag;
 	if (blkrec->jb_oldfrags + blkrec->jb_frags > fs->fs_frag)
 		err_suj("Invalid fragment count %d oldfrags %d\n",
 		    blkrec->jb_frags, frag);
 	/*
 	 * Detect dups.  If we detect a dup we always discard the oldest
 	 * record as it is superseded by the new record.  This speeds up
 	 * later stages but also eliminates free records which are used
 	 * to indicate that the contents of indirects can be trusted.
 	 */
 	TAILQ_FOREACH(srec, &sblk->sb_recs, sr_next) {
 		blkrn = (struct jblkrec *)srec->sr_rec;
 		if (blkrn->jb_ino != blkrec->jb_ino ||
 		    blkrn->jb_lbn != blkrec->jb_lbn ||
 		    blkrn->jb_blkno != blkrec->jb_blkno ||
 		    blkrn->jb_frags != blkrec->jb_frags ||
 		    blkrn->jb_oldfrags != blkrec->jb_oldfrags)
 			continue;
 		if (debug)
 			printf("Removed dup.\n");
 		/* Discard the free which is a dup with an alloc. */
 		if (blkrec->jb_op == JOP_FREEBLK)
 			return;
 		TAILQ_REMOVE(&sblk->sb_recs, srec, sr_next);
 		free(srec);
 		break;
 	}
 	srec = errmalloc(sizeof(*srec));
 	srec->sr_rec = (union jrec *)blkrec;
 	TAILQ_INSERT_TAIL(&sblk->sb_recs, srec, sr_next);
 }
 
 static void
 ino_build_trunc(struct jtrncrec *rec)
 {
 	struct suj_ino *sino;
 
 	if (debug)
-		printf("ino_build_trunc: op %d ino %d, size %jd\n",
-		    rec->jt_op, rec->jt_ino, rec->jt_size);
+		printf("ino_build_trunc: op %d ino %ju, size %jd\n",
+		    rec->jt_op, (uintmax_t)rec->jt_ino,
+		    (uintmax_t)rec->jt_size);
 	sino = ino_lookup(rec->jt_ino, 1);
 	if (rec->jt_op == JOP_SYNC) {
 		sino->si_trunc = NULL;
 		return;
 	}
 	if (sino->si_trunc == NULL || sino->si_trunc->jt_size > rec->jt_size)
 		sino->si_trunc = rec;
 }
 
 /*
  * Build up tables of the operations we need to recover.
  */
 static void
 suj_build(void)
 {
 	struct suj_seg *seg;
 	union jrec *rec;
 	int off;
 	int i;
 
 	TAILQ_FOREACH(seg, &allsegs, ss_next) {
 		if (debug)
 			printf("seg %jd has %d records, oldseq %jd.\n",
 			    seg->ss_rec.jsr_seq, seg->ss_rec.jsr_cnt,
 			    seg->ss_rec.jsr_oldest);
 		off = 0;
 		rec = (union jrec *)seg->ss_blk;
 		for (i = 0; i < seg->ss_rec.jsr_cnt; off += JREC_SIZE, rec++) {
 			/* skip the segrec. */
 			if ((off % real_dev_bsize) == 0)
 				continue;
 			switch (rec->rec_jrefrec.jr_op) {
 			case JOP_ADDREF:
 			case JOP_REMREF:
 			case JOP_MVREF:
 				ino_append(rec);
 				break;
 			case JOP_NEWBLK:
 			case JOP_FREEBLK:
 				blk_build((struct jblkrec *)rec);
 				break;
 			case JOP_TRUNC:
 			case JOP_SYNC:
 				ino_build_trunc((struct jtrncrec *)rec);
 				break;
 			default:
 				err_suj("Unknown journal operation %d (%d)\n",
 				    rec->rec_jrefrec.jr_op, off);
 			}
 			i++;
 		}
 	}
 }
 
 /*
  * Prune the journal segments to those we care about based on the
  * oldest sequence in the newest segment.  Order the segment list
  * based on sequence number.
  */
 static void
 suj_prune(void)
 {
 	struct suj_seg *seg;
 	struct suj_seg *segn;
 	uint64_t newseq;
 	int discard;
 
 	if (debug)
 		printf("Pruning up to %jd\n", oldseq);
 	/* First free the expired segments. */
 	TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 		if (seg->ss_rec.jsr_seq >= oldseq)
 			continue;
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		free(seg->ss_blk);
 		free(seg);
 	}
 	/* Next ensure that segments are ordered properly. */
 	seg = TAILQ_FIRST(&allsegs);
 	if (seg == NULL) {
 		if (debug)
 			printf("Empty journal\n");
 		return;
 	}
 	newseq = seg->ss_rec.jsr_seq;
 	for (;;) {
 		seg = TAILQ_LAST(&allsegs, seghd);
 		if (seg->ss_rec.jsr_seq >= newseq)
 			break;
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		TAILQ_INSERT_HEAD(&allsegs, seg, ss_next);
 		newseq = seg->ss_rec.jsr_seq;
 
 	}
 	if (newseq != oldseq) {
 		TAILQ_FOREACH(seg, &allsegs, ss_next) {
 			printf("%jd, ", seg->ss_rec.jsr_seq);
 		}
 		printf("\n");
 		err_suj("Journal file sequence mismatch %jd != %jd\n",
 		    newseq, oldseq);
 	}
 	/*
 	 * The kernel may asynchronously write segments which can create
 	 * gaps in the sequence space.  Throw away any segments after the
 	 * gap as the kernel guarantees only those that are contiguously
 	 * reachable are marked as completed.
 	 */
 	discard = 0;
 	TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 		if (!discard && newseq++ == seg->ss_rec.jsr_seq) {
 			jrecs += seg->ss_rec.jsr_cnt;
 			jbytes += seg->ss_rec.jsr_blocks * real_dev_bsize;
 			continue;
 		}
 		discard = 1;
 		if (debug)
 			printf("Journal order mismatch %jd != %jd pruning\n",
 			    newseq-1, seg->ss_rec.jsr_seq);
 		TAILQ_REMOVE(&allsegs, seg, ss_next);
 		free(seg->ss_blk);
 		free(seg);
 	}
 	if (debug)
 		printf("Processing journal segments from %jd to %jd\n",
 		    oldseq, newseq-1);
 }
 
 /*
  * Verify the journal inode before attempting to read records.
  */
 static int
 suj_verifyino(union dinode *ip)
 {
 
 	if (DIP(ip, di_nlink) != 1) {
 		printf("Invalid link count %d for journal inode %ju\n",
 		    DIP(ip, di_nlink), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if ((DIP(ip, di_flags) & (SF_IMMUTABLE | SF_NOUNLINK)) !=
 	    (SF_IMMUTABLE | SF_NOUNLINK)) {
 		printf("Invalid flags 0x%X for journal inode %ju\n",
 		    DIP(ip, di_flags), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_mode) != (IFREG | IREAD)) {
 		printf("Invalid mode %o for journal inode %ju\n",
 		    DIP(ip, di_mode), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_size) < SUJ_MIN) {
 		printf("Invalid size %jd for journal inode %ju\n",
 		    DIP(ip, di_size), (uintmax_t)sujino);
 		return (-1);
 	}
 
 	if (DIP(ip, di_modrev) != fs->fs_mtime) {
 		printf("Journal timestamp does not match fs mount time\n");
 		return (-1);
 	}
 
 	return (0);
 }
 
 struct jblocks {
 	struct jextent *jb_extent;	/* Extent array. */
 	int		jb_avail;	/* Available extents. */
 	int		jb_used;	/* Last used extent. */
 	int		jb_head;	/* Allocator head. */
 	int		jb_off;		/* Allocator extent offset. */
 };
 struct jextent {
 	ufs2_daddr_t	je_daddr;	/* Disk block address. */
 	int		je_blocks;	/* Disk block count. */
 };
 
 static struct jblocks *suj_jblocks;
 
 static struct jblocks *
 jblocks_create(void)
 {
 	struct jblocks *jblocks;
 	int size;
 
 	jblocks = errmalloc(sizeof(*jblocks));
 	jblocks->jb_avail = 10;
 	jblocks->jb_used = 0;
 	jblocks->jb_head = 0;
 	jblocks->jb_off = 0;
 	size = sizeof(struct jextent) * jblocks->jb_avail;
 	jblocks->jb_extent = errmalloc(size);
 	bzero(jblocks->jb_extent, size);
 
 	return (jblocks);
 }
 
 /*
  * Return the next available disk block and the amount of contiguous
  * free space it contains.
  */
 static ufs2_daddr_t
 jblocks_next(struct jblocks *jblocks, int bytes, int *actual)
 {
 	struct jextent *jext;
 	ufs2_daddr_t daddr;
 	int freecnt;
 	int blocks;
 
 	blocks = bytes / disk->d_bsize;
 	jext = &jblocks->jb_extent[jblocks->jb_head];
 	freecnt = jext->je_blocks - jblocks->jb_off;
 	if (freecnt == 0) {
 		jblocks->jb_off = 0;
 		if (++jblocks->jb_head > jblocks->jb_used)
 			return (0);
 		jext = &jblocks->jb_extent[jblocks->jb_head];
 		freecnt = jext->je_blocks;
 	}
 	if (freecnt > blocks)
 		freecnt = blocks;
 	*actual = freecnt * disk->d_bsize;
 	daddr = jext->je_daddr + jblocks->jb_off;
 
 	return (daddr);
 }
 
 /*
  * Advance the allocation head by a specified number of bytes, consuming
  * one journal segment.
  */
 static void
 jblocks_advance(struct jblocks *jblocks, int bytes)
 {
 
 	jblocks->jb_off += bytes / disk->d_bsize;
 }
 
 static void
 jblocks_destroy(struct jblocks *jblocks)
 {
 
 	free(jblocks->jb_extent);
 	free(jblocks);
 }
 
 static void
 jblocks_add(struct jblocks *jblocks, ufs2_daddr_t daddr, int blocks)
 {
 	struct jextent *jext;
 	int size;
 
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	/* Adding the first block. */
 	if (jext->je_daddr == 0) {
 		jext->je_daddr = daddr;
 		jext->je_blocks = blocks;
 		return;
 	}
 	/* Extending the last extent. */
 	if (jext->je_daddr + jext->je_blocks == daddr) {
 		jext->je_blocks += blocks;
 		return;
 	}
 	/* Adding a new extent. */
 	if (++jblocks->jb_used == jblocks->jb_avail) {
 		jblocks->jb_avail *= 2;
 		size = sizeof(struct jextent) * jblocks->jb_avail;
 		jext = errmalloc(size);
 		bzero(jext, size);
 		bcopy(jblocks->jb_extent, jext,
 		    sizeof(struct jextent) * jblocks->jb_used);
 		free(jblocks->jb_extent);
 		jblocks->jb_extent = jext;
 	}
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	jext->je_daddr = daddr;
 	jext->je_blocks = blocks;
 
 	return;
 }
 
 /*
  * Add a file block from the journal to the extent map.  We can't read
  * each file block individually because the kernel treats it as a circular
  * buffer and segments may span mutliple contiguous blocks.
  */
 static void
 suj_add_block(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 
 	jblocks_add(suj_jblocks, fsbtodb(fs, blk), fsbtodb(fs, frags));
 }
 
 static void
 suj_read(void)
 {
 	uint8_t block[1 * 1024 * 1024];
 	struct suj_seg *seg;
 	struct jsegrec *recn;
 	struct jsegrec *rec;
 	ufs2_daddr_t blk;
 	int readsize;
 	int blocks;
 	int recsize;
 	int size;
 	int i;
 
 	/*
 	 * Read records until we exhaust the journal space.  If we find
 	 * an invalid record we start searching for a valid segment header
 	 * at the next block.  This is because we don't have a head/tail
 	 * pointer and must recover the information indirectly.  At the gap
 	 * between the head and tail we won't necessarily have a valid
 	 * segment.
 	 */
 restart:
 	for (;;) {
 		size = sizeof(block);
 		blk = jblocks_next(suj_jblocks, size, &readsize);
 		if (blk == 0)
 			return;
 		size = readsize;
 		/*
 		 * Read 1MB at a time and scan for records within this block.
 		 */
 		if (bread(disk, blk, &block, size) == -1) {
 			err_suj("Error reading journal block %jd\n",
 			    (intmax_t)blk);
 		}
 		for (rec = (void *)block; size; size -= recsize,
 		    rec = (struct jsegrec *)((uintptr_t)rec + recsize)) {
 			recsize = real_dev_bsize;
 			if (rec->jsr_time != fs->fs_mtime) {
 				if (debug)
 					printf("Rec time %jd != fs mtime %jd\n",
 					    rec->jsr_time, fs->fs_mtime);
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			if (rec->jsr_cnt == 0) {
 				if (debug)
 					printf("Found illegal count %d\n",
 					    rec->jsr_cnt);
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			blocks = rec->jsr_blocks;
 			recsize = blocks * real_dev_bsize;
 			if (recsize > size) {
 				/*
 				 * We may just have run out of buffer, restart
 				 * the loop to re-read from this spot.
 				 */
 				if (size < fs->fs_bsize &&
 				    size != readsize &&
 				    recsize <= fs->fs_bsize)
 					goto restart;
 				if (debug)
 					printf("Found invalid segsize %d > %d\n",
 					    recsize, size);
 				recsize = real_dev_bsize;
 				jblocks_advance(suj_jblocks, recsize);
 				continue;
 			}
 			/*
 			 * Verify that all blocks in the segment are present.
 			 */
 			for (i = 1; i < blocks; i++) {
 				recn = (void *)((uintptr_t)rec) + i *
 				    real_dev_bsize;
 				if (recn->jsr_seq == rec->jsr_seq &&
 				    recn->jsr_time == rec->jsr_time)
 					continue;
 				if (debug)
 					printf("Incomplete record %jd (%d)\n",
 					    rec->jsr_seq, i);
 				recsize = i * real_dev_bsize;
 				jblocks_advance(suj_jblocks, recsize);
 				goto restart;
 			}
 			seg = errmalloc(sizeof(*seg));
 			seg->ss_blk = errmalloc(recsize);
 			seg->ss_rec = *rec;
 			bcopy((void *)rec, seg->ss_blk, recsize);
 			if (rec->jsr_oldest > oldseq)
 				oldseq = rec->jsr_oldest;
 			TAILQ_INSERT_TAIL(&allsegs, seg, ss_next);
 			jblocks_advance(suj_jblocks, recsize);
 		}
 	}
 }
 
 /*
  * Search a directory block for the SUJ_FILE.
  */
 static void
 suj_find(ino_t ino, ufs_lbn_t lbn, ufs2_daddr_t blk, int frags)
 {
 	char block[MAXBSIZE];
 	struct direct *dp;
 	int bytes;
 	int off;
 
 	if (sujino)
 		return;
 	bytes = lfragtosize(fs, frags);
 	if (bread(disk, fsbtodb(fs, blk), block, bytes) <= 0)
 		err_suj("Failed to read ROOTINO directory block %jd\n", blk);
 	for (off = 0; off < bytes; off += dp->d_reclen) {
 		dp = (struct direct *)&block[off];
 		if (dp->d_reclen == 0)
 			break;
 		if (dp->d_ino == 0)
 			continue;
 		if (dp->d_namlen != strlen(SUJ_FILE))
 			continue;
 		if (bcmp(dp->d_name, SUJ_FILE, dp->d_namlen) != 0)
 			continue;
 		sujino = dp->d_ino;
 		return;
 	}
 }
 
 /*
  * Orchestrate the verification of a filesystem via the softupdates journal.
  */
 int
 suj_check(const char *filesys)
 {
 	union dinode *jip;
 	union dinode *ip;
 	uint64_t blocks;
 	int retval;
 	struct suj_seg *seg;
 	struct suj_seg *segn;
 
 	initsuj();
 	opendisk(filesys);
 
 	/*
 	 * Set an exit point when SUJ check failed
 	 */
 	retval = setjmp(jmpbuf);
 	if (retval != 0) {
 		pwarn("UNEXPECTED SU+J INCONSISTENCY\n");
 		TAILQ_FOREACH_SAFE(seg, &allsegs, ss_next, segn) {
 			TAILQ_REMOVE(&allsegs, seg, ss_next);
 				free(seg->ss_blk);
 				free(seg);
 		}
 		if (reply("FALLBACK TO FULL FSCK") == 0) {
 			ckfini(0);
 			exit(EEXIT);
 		} else
 			return (-1);
 	}
 
 	/*
 	 * Find the journal inode.
 	 */
 	ip = ino_read(ROOTINO);
 	sujino = 0;
 	ino_visit(ip, ROOTINO, suj_find, 0);
 	if (sujino == 0) {
 		printf("Journal inode removed.  Use tunefs to re-create.\n");
 		sblock.fs_flags &= ~FS_SUJ;
 		sblock.fs_sujfree = 0;
 		return (-1);
 	}
 	/*
 	 * Fetch the journal inode and verify it.
 	 */
 	jip = ino_read(sujino);
 	printf("** SU+J Recovering %s\n", filesys);
 	if (suj_verifyino(jip) != 0)
 		return (-1);
 	/*
 	 * Build a list of journal blocks in jblocks before parsing the
 	 * available journal blocks in with suj_read().
 	 */
 	printf("** Reading %jd byte journal from inode %ju.\n",
 	    DIP(jip, di_size), (uintmax_t)sujino);
 	suj_jblocks = jblocks_create();
 	blocks = ino_visit(jip, sujino, suj_add_block, 0);
 	if (blocks != numfrags(fs, DIP(jip, di_size))) {
 		printf("Sparse journal inode %ju.\n", (uintmax_t)sujino);
 		return (-1);
 	}
 	suj_read();
 	jblocks_destroy(suj_jblocks);
 	suj_jblocks = NULL;
 	if (preen || reply("RECOVER")) {
 		printf("** Building recovery table.\n");
 		suj_prune();
 		suj_build();
 		cg_apply(cg_build);
 		printf("** Resolving unreferenced inode list.\n");
 		ino_unlinked();
 		printf("** Processing journal entries.\n");
 		cg_apply(cg_trunc);
 		cg_apply(cg_check_blk);
 		cg_apply(cg_adj_blk);
 		cg_apply(cg_check_ino);
 	}
 	if (preen == 0 && (jrecs > 0 || jbytes > 0) && reply("WRITE CHANGES") == 0)
 		return (0);
 	/*
 	 * To remain idempotent with partial truncations the free bitmaps
 	 * must be written followed by indirect blocks and lastly inode
 	 * blocks.  This preserves access to the modified pointers until
 	 * they are freed.
 	 */
 	cg_apply(cg_write);
 	dblk_write();
 	cg_apply(cg_write_inos);
 	/* Write back superblock. */
 	closedisk(filesys);
 	if (jrecs > 0 || jbytes > 0) {
 		printf("** %jd journal records in %jd bytes for %.2f%% utilization\n",
 		    jrecs, jbytes, ((float)jrecs / (float)(jbytes / JREC_SIZE)) * 100);
 		printf("** Freed %jd inodes (%jd dirs) %jd blocks, and %jd frags.\n",
 		    freeinos, freedir, freeblocks, freefrags);
 	}
 
 	return (0);
 }
 
 static void
 initsuj(void)
 {
 	int i;
 
 	for (i = 0; i < SUJ_HASHSIZE; i++) {
 		LIST_INIT(&cghash[i]);
 		LIST_INIT(&dbhash[i]);
 	}
 	lastcg = NULL;
 	lastblk = NULL;
 	TAILQ_INIT(&allsegs);
 	oldseq = 0;
 	disk = NULL;
 	fs = NULL;
 	sujino = 0;
 	freefrags = 0;
 	freeblocks = 0;
 	freeinos = 0;
 	freedir = 0;
 	jbytes = 0;
 	jrecs = 0;
 	suj_jblocks = NULL;
 }
Index: head/sys/fs/nfsclient/nfs_clvnops.c
===================================================================
--- head/sys/fs/nfsclient/nfs_clvnops.c	(revision 311521)
+++ head/sys/fs/nfsclient/nfs_clvnops.c	(revision 311522)
@@ -1,3498 +1,3498 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Rick Macklem at The University of Guelph.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from nfs_vnops.c	8.16 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 /*
  * vnode op calls for Sun NFS version 2, 3 and 4
  */
 
 #include "opt_inet.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/jail.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/namei.h>
 #include <sys/socket.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/signalvar.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 #include <fs/nfs/nfsport.h>
 #include <fs/nfsclient/nfsnode.h>
 #include <fs/nfsclient/nfsmount.h>
 #include <fs/nfsclient/nfs.h>
 #include <fs/nfsclient/nfs_kdtrace.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 #include <netinet/in_var.h>
 
 #include <nfs/nfs_lock.h>
 
 #ifdef KDTRACE_HOOKS
 #include <sys/dtrace_bsd.h>
 
 dtrace_nfsclient_accesscache_flush_probe_func_t
 		dtrace_nfscl_accesscache_flush_done_probe;
 uint32_t	nfscl_accesscache_flush_done_id;
 
 dtrace_nfsclient_accesscache_get_probe_func_t
 		dtrace_nfscl_accesscache_get_hit_probe,
 		dtrace_nfscl_accesscache_get_miss_probe;
 uint32_t	nfscl_accesscache_get_hit_id;
 uint32_t	nfscl_accesscache_get_miss_id;
 
 dtrace_nfsclient_accesscache_load_probe_func_t
 		dtrace_nfscl_accesscache_load_done_probe;
 uint32_t	nfscl_accesscache_load_done_id;
 #endif /* !KDTRACE_HOOKS */
 
 /* Defs */
 #define	TRUE	1
 #define	FALSE	0
 
 extern struct nfsstatsv1 nfsstatsv1;
 extern int nfsrv_useacl;
 extern int nfscl_debuglevel;
 MALLOC_DECLARE(M_NEWNFSREQ);
 
 static vop_read_t	nfsfifo_read;
 static vop_write_t	nfsfifo_write;
 static vop_close_t	nfsfifo_close;
 static int	nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *,
 		    struct thread *);
 static vop_lookup_t	nfs_lookup;
 static vop_create_t	nfs_create;
 static vop_mknod_t	nfs_mknod;
 static vop_open_t	nfs_open;
 static vop_pathconf_t	nfs_pathconf;
 static vop_close_t	nfs_close;
 static vop_access_t	nfs_access;
 static vop_getattr_t	nfs_getattr;
 static vop_setattr_t	nfs_setattr;
 static vop_read_t	nfs_read;
 static vop_fsync_t	nfs_fsync;
 static vop_remove_t	nfs_remove;
 static vop_link_t	nfs_link;
 static vop_rename_t	nfs_rename;
 static vop_mkdir_t	nfs_mkdir;
 static vop_rmdir_t	nfs_rmdir;
 static vop_symlink_t	nfs_symlink;
 static vop_readdir_t	nfs_readdir;
 static vop_strategy_t	nfs_strategy;
 static	int	nfs_lookitup(struct vnode *, char *, int,
 		    struct ucred *, struct thread *, struct nfsnode **);
 static	int	nfs_sillyrename(struct vnode *, struct vnode *,
 		    struct componentname *);
 static vop_access_t	nfsspec_access;
 static vop_readlink_t	nfs_readlink;
 static vop_print_t	nfs_print;
 static vop_advlock_t	nfs_advlock;
 static vop_advlockasync_t nfs_advlockasync;
 static vop_getacl_t nfs_getacl;
 static vop_setacl_t nfs_setacl;
 
 /*
  * Global vfs data structures for nfs
  */
 struct vop_vector newnfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_access =		nfs_access,
 	.vop_advlock =		nfs_advlock,
 	.vop_advlockasync =	nfs_advlockasync,
 	.vop_close =		nfs_close,
 	.vop_create =		nfs_create,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_getpages =		ncl_getpages,
 	.vop_putpages =		ncl_putpages,
 	.vop_inactive =		ncl_inactive,
 	.vop_link =		nfs_link,
 	.vop_lookup =		nfs_lookup,
 	.vop_mkdir =		nfs_mkdir,
 	.vop_mknod =		nfs_mknod,
 	.vop_open =		nfs_open,
 	.vop_pathconf =		nfs_pathconf,
 	.vop_print =		nfs_print,
 	.vop_read =		nfs_read,
 	.vop_readdir =		nfs_readdir,
 	.vop_readlink =		nfs_readlink,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_remove =		nfs_remove,
 	.vop_rename =		nfs_rename,
 	.vop_rmdir =		nfs_rmdir,
 	.vop_setattr =		nfs_setattr,
 	.vop_strategy =		nfs_strategy,
 	.vop_symlink =		nfs_symlink,
 	.vop_write =		ncl_write,
 	.vop_getacl =		nfs_getacl,
 	.vop_setacl =		nfs_setacl,
 };
 
 struct vop_vector newnfs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_access =		nfsspec_access,
 	.vop_close =		nfsfifo_close,
 	.vop_fsync =		nfs_fsync,
 	.vop_getattr =		nfs_getattr,
 	.vop_inactive =		ncl_inactive,
 	.vop_print =		nfs_print,
 	.vop_read =		nfsfifo_read,
 	.vop_reclaim =		ncl_reclaim,
 	.vop_setattr =		nfs_setattr,
 	.vop_write =		nfsfifo_write,
 };
 
 static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp,
     struct componentname *cnp, struct vattr *vap);
 static int nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td);
 static int nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp,
     char *fnameptr, int fnamelen, struct vnode *tdvp, struct vnode *tvp,
     char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td);
 static int nfs_renameit(struct vnode *sdvp, struct vnode *svp,
     struct componentname *scnp, struct sillyrename *sp);
 
 /*
  * Global variables
  */
 #define	DIRHDSIZ	(sizeof (struct dirent) - (MAXNAMLEN + 1))
 
 SYSCTL_DECL(_vfs_nfs);
 
 static int	nfsaccess_cache_timeout = NFS_MAXATTRTIMO;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW,
 	   &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout");
 
 static int	nfs_prime_access_cache = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, prime_access_cache, CTLFLAG_RW,
 	   &nfs_prime_access_cache, 0,
 	   "Prime NFS ACCESS cache when fetching attributes");
 
 static int	newnfs_commit_on_close = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, commit_on_close, CTLFLAG_RW,
     &newnfs_commit_on_close, 0, "write+commit on close, else only write");
 
 static int	nfs_clean_pages_on_close = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW,
 	   &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close");
 
 int newnfs_directio_enable = 0;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW,
 	   &newnfs_directio_enable, 0, "Enable NFS directio");
 
 int nfs_keep_dirty_on_error;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_keep_dirty_on_error, CTLFLAG_RW,
     &nfs_keep_dirty_on_error, 0, "Retry pageout if error returned");
 
 /*
  * This sysctl allows other processes to mmap a file that has been opened
  * O_DIRECT by a process.  In general, having processes mmap the file while
  * Direct IO is in progress can lead to Data Inconsistencies.  But, we allow
  * this by default to prevent DoS attacks - to prevent a malicious user from
  * opening up files O_DIRECT preventing other users from mmap'ing these
  * files.  "Protected" environments where stricter consistency guarantees are
  * required can disable this knob.  The process that opened the file O_DIRECT
  * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not
  * meaningful.
  */
 int newnfs_directio_allow_mmap = 1;
 SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW,
 	   &newnfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens");
 
 #define	NFSACCESS_ALL (NFSACCESS_READ | NFSACCESS_MODIFY		\
 			 | NFSACCESS_EXTEND | NFSACCESS_EXECUTE	\
 			 | NFSACCESS_DELETE | NFSACCESS_LOOKUP)
 
 /*
  * SMP Locking Note :
  * The list of locks after the description of the lock is the ordering
  * of other locks acquired with the lock held.
  * np->n_mtx : Protects the fields in the nfsnode.
        VM Object Lock
        VI_MTX (acquired indirectly)
  * nmp->nm_mtx : Protects the fields in the nfsmount.
        rep->r_mtx
  * ncl_iod_mutex : Global lock, protects shared nfsiod state.
  * nfs_reqq_mtx : Global lock, protects the nfs_reqq list.
        nmp->nm_mtx
        rep->r_mtx
  * rep->r_mtx : Protects the fields in an nfsreq.
  */
 
 static int
 nfs34_access_otw(struct vnode *vp, int wmode, struct thread *td,
     struct ucred *cred, u_int32_t *retmode)
 {
 	int error = 0, attrflag, i, lrupos;
 	u_int32_t rmode;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_accessrpc(vp, wmode, cred, td, &nfsva, &attrflag,
 	    &rmode, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		lrupos = 0;
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (np->n_accesscache[i].uid == cred->cr_uid) {
 				np->n_accesscache[i].mode = rmode;
 				np->n_accesscache[i].stamp = time_second;
 				break;
 			}
 			if (i > 0 && np->n_accesscache[i].stamp <
 			    np->n_accesscache[lrupos].stamp)
 				lrupos = i;
 		}
 		if (i == NFS_ACCESSCACHESIZE) {
 			np->n_accesscache[lrupos].uid = cred->cr_uid;
 			np->n_accesscache[lrupos].mode = rmode;
 			np->n_accesscache[lrupos].stamp = time_second;
 		}
 		mtx_unlock(&np->n_mtx);
 		if (retmode != NULL)
 			*retmode = rmode;
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, rmode, 0);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 #ifdef KDTRACE_HOOKS
 	if (error != 0)
 		KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, cred->cr_uid, 0,
 		    error);
 #endif
 	return (error);
 }
 
 /*
  * nfs access vnode op.
  * For nfs version 2, just return ok. File accesses may fail later.
  * For nfs version 3, use the access rpc to check accessibility. If file modes
  * are changed on the server, accesses might still fail later.
  */
 static int
 nfs_access(struct vop_access_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	int error = 0, i, gotahit;
 	u_int32_t mode, wmode, rmode;
 	int v34 = NFS_ISV34(vp);
 	struct nfsnode *np = VTONFS(vp);
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((ap->a_accmode & (VWRITE | VAPPEND | VWRITE_NAMED_ATTRS |
 	    VDELETE_CHILD | VWRITE_ATTRIBUTES | VDELETE | VWRITE_ACL |
 	    VWRITE_OWNER)) != 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) != 0) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	/*
 	 * For nfs v3 or v4, check to see if we have done this recently, and if
 	 * so return our cached result instead of making an ACCESS call.
 	 * If not, do an access rpc, otherwise you are stuck emulating
 	 * ufs_access() locally using the vattr. This may not be correct,
 	 * since the server may apply other access criteria such as
 	 * client uid-->server uid mapping that we do not know about.
 	 */
 	if (v34) {
 		if (ap->a_accmode & VREAD)
 			mode = NFSACCESS_READ;
 		else
 			mode = 0;
 		if (vp->v_type != VDIR) {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_EXECUTE;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 		} else {
 			if (ap->a_accmode & VWRITE)
 				mode |= (NFSACCESS_MODIFY | NFSACCESS_EXTEND);
 			if (ap->a_accmode & VAPPEND)
 				mode |= NFSACCESS_EXTEND;
 			if (ap->a_accmode & VEXEC)
 				mode |= NFSACCESS_LOOKUP;
 			if (ap->a_accmode & VDELETE)
 				mode |= NFSACCESS_DELETE;
 			if (ap->a_accmode & VDELETE_CHILD)
 				mode |= NFSACCESS_MODIFY;
 		}
 		/* XXX safety belt, only make blanket request if caching */
 		if (nfsaccess_cache_timeout > 0) {
 			wmode = NFSACCESS_READ | NFSACCESS_MODIFY |
 				NFSACCESS_EXTEND | NFSACCESS_EXECUTE |
 				NFSACCESS_DELETE | NFSACCESS_LOOKUP;
 		} else {
 			wmode = mode;
 		}
 
 		/*
 		 * Does our cached result allow us to give a definite yes to
 		 * this request?
 		 */
 		gotahit = 0;
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++) {
 			if (ap->a_cred->cr_uid == np->n_accesscache[i].uid) {
 			    if (time_second < (np->n_accesscache[i].stamp
 				+ nfsaccess_cache_timeout) &&
 				(np->n_accesscache[i].mode & mode) == mode) {
 				NFSINCRGLOBAL(nfsstatsv1.accesscache_hits);
 				gotahit = 1;
 			    }
 			    break;
 			}
 		}
 		mtx_unlock(&np->n_mtx);
 #ifdef KDTRACE_HOOKS
 		if (gotahit != 0)
 			KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp,
 			    ap->a_cred->cr_uid, mode);
 		else
 			KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp,
 			    ap->a_cred->cr_uid, mode);
 #endif
 		if (gotahit == 0) {
 			/*
 			 * Either a no, or a don't know.  Go to the wire.
 			 */
 			NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		        error = nfs34_access_otw(vp, wmode, ap->a_td,
 			    ap->a_cred, &rmode);
 			if (!error &&
 			    (rmode & mode) != mode)
 				error = EACCES;
 		}
 		return (error);
 	} else {
 		if ((error = nfsspec_access(ap)) != 0) {
 			return (error);
 		}
 		/*
 		 * Attempt to prevent a mapped root from accessing a file
 		 * which it shouldn't.  We try to read a byte from the file
 		 * if the user is root and the file is not zero length.
 		 * After calling nfsspec_access, we should have the correct
 		 * file size cached.
 		 */
 		mtx_lock(&np->n_mtx);
 		if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD)
 		    && VTONFS(vp)->n_size > 0) {
 			struct iovec aiov;
 			struct uio auio;
 			char buf[1];
 
 			mtx_unlock(&np->n_mtx);
 			aiov.iov_base = buf;
 			aiov.iov_len = 1;
 			auio.uio_iov = &aiov;
 			auio.uio_iovcnt = 1;
 			auio.uio_offset = 0;
 			auio.uio_resid = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_td = ap->a_td;
 
 			if (vp->v_type == VREG)
 				error = ncl_readrpc(vp, &auio, ap->a_cred);
 			else if (vp->v_type == VDIR) {
 				char* bp;
 				bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK);
 				aiov.iov_base = bp;
 				aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ;
 				error = ncl_readdirrpc(vp, &auio, ap->a_cred,
 				    ap->a_td);
 				free(bp, M_TEMP);
 			} else if (vp->v_type == VLNK)
 				error = ncl_readlinkrpc(vp, &auio, ap->a_cred);
 			else
 				error = EACCES;
 		} else
 			mtx_unlock(&np->n_mtx);
 		return (error);
 	}
 }
 
 
 /*
  * nfs open vnode op
  * Check to see if the type is ok
  * and that deletion is not in progress.
  * For paged in text files, you will need to flush the page cache
  * if consistency is lost.
  */
 /* ARGSUSED */
 static int
 nfs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	int error;
 	int fmode = ap->a_mode;
 	struct ucred *cred;
 
 	if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
 		return (EOPNOTSUPP);
 
 	/*
 	 * For NFSv4, we need to do the Open Op before cache validation,
 	 * so that we conform to RFC3530 Sec. 9.3.1.
 	 */
 	if (NFS_ISV4(vp)) {
 		error = nfsrpc_open(vp, fmode, ap->a_cred, ap->a_td);
 		if (error) {
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 			return (error);
 		}
 	}
 
 	/*
 	 * Now, if this Open will be doing reading, re-validate/flush the
 	 * cache, so that Close/Open coherency is maintained.
 	 */
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
 		error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 		if (error == EINTR || error == EIO) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 		if (vp->v_type == VDIR)
 			np->n_direofoffset = 0;
 		mtx_unlock(&np->n_mtx);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		np->n_mtime = vattr.va_mtime;
 		if (NFS_ISV4(vp))
 			np->n_change = vattr.va_filerev;
 	} else {
 		mtx_unlock(&np->n_mtx);
 		error = VOP_GETATTR(vp, &vattr, ap->a_cred);
 		if (error) {
 			if (NFS_ISV4(vp))
 				(void) nfsrpc_close(vp, 0, ap->a_td);
 			return (error);
 		}
 		mtx_lock(&np->n_mtx);
 		if ((NFS_ISV4(vp) && np->n_change != vattr.va_filerev) ||
 		    NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 			if (vp->v_type == VDIR)
 				np->n_direofoffset = 0;
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error == EINTR || error == EIO) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			mtx_lock(&np->n_mtx);
 			np->n_mtime = vattr.va_mtime;
 			if (NFS_ISV4(vp))
 				np->n_change = vattr.va_filerev;
 		}
 	}
 
 	/*
 	 * If the object has >= 1 O_DIRECT active opens, we disable caching.
 	 */
 	if (newnfs_directio_enable && (fmode & O_DIRECT) &&
 	    (vp->v_type == VREG)) {
 		if (np->n_directio_opens == 0) {
 			mtx_unlock(&np->n_mtx);
 			error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 			if (error) {
 				if (NFS_ISV4(vp))
 					(void) nfsrpc_close(vp, 0, ap->a_td);
 				return (error);
 			}
 			mtx_lock(&np->n_mtx);
 			np->n_flag |= NNONCACHE;
 		}
 		np->n_directio_opens++;
 	}
 
 	/* If opened for writing via NFSv4.1 or later, mark that for pNFS. */
 	if (NFSHASPNFS(VFSTONFS(vp->v_mount)) && (fmode & FWRITE) != 0)
 		np->n_flag |= NWRITEOPENED;
 
 	/*
 	 * If this is an open for writing, capture a reference to the
 	 * credentials, so they can be used by ncl_putpages(). Using
 	 * these write credentials is preferable to the credentials of
 	 * whatever thread happens to be doing the VOP_PUTPAGES() since
 	 * the write RPCs are less likely to fail with EACCES.
 	 */
 	if ((fmode & FWRITE) != 0) {
 		cred = np->n_writecred;
 		np->n_writecred = crhold(ap->a_cred);
 	} else
 		cred = NULL;
 	mtx_unlock(&np->n_mtx);
 
 	if (cred != NULL)
 		crfree(cred);
 	vnode_create_vobject(vp, vattr.va_size, ap->a_td);
 	return (0);
 }
 
 /*
  * nfs close vnode op
  * What an NFS client should do upon close after writing is a debatable issue.
  * Most NFS clients push delayed writes to the server upon close, basically for
  * two reasons:
  * 1 - So that any write errors may be reported back to the client process
  *     doing the close system call. By far the two most likely errors are
  *     NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure.
  * 2 - To put a worst case upper bound on cache inconsistency between
  *     multiple clients for the file.
  * There is also a consistency problem for Version 2 of the protocol w.r.t.
  * not being able to tell if other clients are writing a file concurrently,
  * since there is no way of knowing if the changed modify time in the reply
  * is only due to the write for this client.
  * (NFS Version 3 provides weak cache consistency data in the reply that
  *  should be sufficient to detect and handle this case.)
  *
  * The current code does the following:
  * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers
  * for NFS Version 3 - flush dirty buffers to the server but don't invalidate
  *                     or commit them (this satisfies 1 and 2 except for the
  *                     case where the server crashes after this close but
  *                     before the commit RPC, which is felt to be "good
  *                     enough". Changing the last argument to ncl_flush() to
  *                     a 1 would force a commit operation, if it is felt a
  *                     commit is necessary now.
  * for NFS Version 4 - flush the dirty buffers and commit them, if
  *		       nfscl_mustflush() says this is necessary.
  *                     It is necessary if there is no write delegation held,
  *                     in order to satisfy open/close coherency.
  *                     If the file isn't cached on local stable storage,
  *                     it may be necessary in order to detect "out of space"
  *                     errors from the server, if the write delegation
  *                     issued by the server doesn't allow the file to grow.
  */
 /* ARGSUSED */
 static int
 nfs_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct nfsvattr nfsva;
 	struct ucred *cred;
 	int error = 0, ret, localcred = 0;
 	int fmode = ap->a_fflag;
 
 	if ((vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF))
 		return (0);
 	/*
 	 * During shutdown, a_cred isn't valid, so just use root.
 	 */
 	if (ap->a_cred == NOCRED) {
 		cred = newnfs_getcred();
 		localcred = 1;
 	} else {
 		cred = ap->a_cred;
 	}
 	if (vp->v_type == VREG) {
 	    /*
 	     * Examine and clean dirty pages, regardless of NMODIFIED.
 	     * This closes a major hole in close-to-open consistency.
 	     * We want to push out all dirty pages (and buffers) on
 	     * close, regardless of whether they were dirtied by
 	     * mmap'ed writes or via write().
 	     */
 	    if (nfs_clean_pages_on_close && vp->v_object) {
 		VM_OBJECT_WLOCK(vp->v_object);
 		vm_object_page_clean(vp->v_object, 0, 0, 0);
 		VM_OBJECT_WUNLOCK(vp->v_object);
 	    }
 	    mtx_lock(&np->n_mtx);
 	    if (np->n_flag & NMODIFIED) {
 		mtx_unlock(&np->n_mtx);
 		if (NFS_ISV3(vp)) {
 		    /*
 		     * Under NFSv3 we have dirty buffers to dispose of.  We
 		     * must flush them to the NFS server.  We have the option
 		     * of waiting all the way through the commit rpc or just
 		     * waiting for the initial write.  The default is to only
 		     * wait through the initial write so the data is in the
 		     * server's cache, which is roughly similar to the state
 		     * a standard disk subsystem leaves the file in on close().
 		     *
 		     * We cannot clear the NMODIFIED bit in np->n_flag due to
 		     * potential races with other processes, and certainly
 		     * cannot clear it if we don't commit.
 		     * These races occur when there is no longer the old
 		     * traditional vnode locking implemented for Vnode Ops.
 		     */
 		    int cm = newnfs_commit_on_close ? 1 : 0;
 		    error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td, cm, 0);
 		    /* np->n_flag &= ~NMODIFIED; */
 		} else if (NFS_ISV4(vp)) { 
 			if (nfscl_mustflush(vp) != 0) {
 				int cm = newnfs_commit_on_close ? 1 : 0;
 				error = ncl_flush(vp, MNT_WAIT, cred, ap->a_td,
 				    cm, 0);
 				/*
 				 * as above w.r.t races when clearing
 				 * NMODIFIED.
 				 * np->n_flag &= ~NMODIFIED;
 				 */
 			}
 		} else
 		    error = ncl_vinvalbuf(vp, V_SAVE, ap->a_td, 1);
 		mtx_lock(&np->n_mtx);
 	    }
  	    /* 
  	     * Invalidate the attribute cache in all cases.
  	     * An open is going to fetch fresh attrs any way, other procs
  	     * on this node that have file open will be forced to do an 
  	     * otw attr fetch, but this is safe.
 	     * --> A user found that their RPC count dropped by 20% when
 	     *     this was commented out and I can't see any requirement
 	     *     for it, so I've disabled it when negative lookups are
 	     *     enabled. (What does this have to do with negative lookup
 	     *     caching? Well nothing, except it was reported by the
 	     *     same user that needed negative lookup caching and I wanted
 	     *     there to be a way to disable it to see if it
 	     *     is the cause of some caching/coherency issue that might
 	     *     crop up.)
  	     */
 	    if (VFSTONFS(vp->v_mount)->nm_negnametimeo == 0) {
 		    np->n_attrstamp = 0;
 		    KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	    }
 	    if (np->n_flag & NWRITEERR) {
 		np->n_flag &= ~NWRITEERR;
 		error = np->n_error;
 	    }
 	    mtx_unlock(&np->n_mtx);
 	}
 
 	if (NFS_ISV4(vp)) {
 		/*
 		 * Get attributes so "change" is up to date.
 		 */
 		if (error == 0 && nfscl_mustflush(vp) != 0 &&
 		    vp->v_type == VREG &&
 		    (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOCTO) == 0) {
 			ret = nfsrpc_getattr(vp, cred, ap->a_td, &nfsva,
 			    NULL);
 			if (!ret) {
 				np->n_change = nfsva.na_filerev;
 				(void) nfscl_loadattrcache(&vp, &nfsva, NULL,
 				    NULL, 0, 0);
 			}
 		}
 
 		/*
 		 * and do the close.
 		 */
 		ret = nfsrpc_close(vp, 0, ap->a_td);
 		if (!error && ret)
 			error = ret;
 		if (error)
 			error = nfscl_maperr(ap->a_td, error, (uid_t)0,
 			    (gid_t)0);
 	}
 	if (newnfs_directio_enable)
 		KASSERT((np->n_directio_asyncwr == 0),
 			("nfs_close: dirty unflushed (%d) directio buffers\n",
 			 np->n_directio_asyncwr));
 	if (newnfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) {
 		mtx_lock(&np->n_mtx);
 		KASSERT((np->n_directio_opens > 0), 
 			("nfs_close: unexpectedly value (0) of n_directio_opens\n"));
 		np->n_directio_opens--;
 		if (np->n_directio_opens == 0)
 			np->n_flag &= ~NNONCACHE;
 		mtx_unlock(&np->n_mtx);
 	}
 	if (localcred)
 		NFSFREECRED(cred);
 	return (error);
 }
 
 /*
  * nfs getattr call from vfs.
  */
 static int
 nfs_getattr(struct vop_getattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;	/* XXX */
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct nfsvattr nfsva;
 	struct vattr *vap = ap->a_vap;
 	struct vattr vattr;
 
 	/*
 	 * Update local times for special files.
 	 */
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & (NACC | NUPD))
 		np->n_flag |= NCHG;
 	mtx_unlock(&np->n_mtx);
 	/*
 	 * First look in the cache.
 	 */
 	if (ncl_getattrcache(vp, &vattr) == 0) {
 		vap->va_type = vattr.va_type;
 		vap->va_mode = vattr.va_mode;
 		vap->va_nlink = vattr.va_nlink;
 		vap->va_uid = vattr.va_uid;
 		vap->va_gid = vattr.va_gid;
 		vap->va_fsid = vattr.va_fsid;
 		vap->va_fileid = vattr.va_fileid;
 		vap->va_size = vattr.va_size;
 		vap->va_blocksize = vattr.va_blocksize;
 		vap->va_atime = vattr.va_atime;
 		vap->va_mtime = vattr.va_mtime;
 		vap->va_ctime = vattr.va_ctime;
 		vap->va_gen = vattr.va_gen;
 		vap->va_flags = vattr.va_flags;
 		vap->va_rdev = vattr.va_rdev;
 		vap->va_bytes = vattr.va_bytes;
 		vap->va_filerev = vattr.va_filerev;
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 		return (0);
 	}
 
 	if (NFS_ISV34(vp) && nfs_prime_access_cache &&
 	    nfsaccess_cache_timeout > 0) {
 		NFSINCRGLOBAL(nfsstatsv1.accesscache_misses);
 		nfs34_access_otw(vp, NFSACCESS_ALL, td, ap->a_cred, NULL);
 		if (ncl_getattrcache(vp, ap->a_vap) == 0) {
 			nfscl_deleggetmodtime(vp, &ap->a_vap->va_mtime);
 			return (0);
 		}
 	}
 	error = nfsrpc_getattr(vp, ap->a_cred, td, &nfsva, NULL);
 	if (!error)
 		error = nfscl_loadattrcache(&vp, &nfsva, vap, NULL, 0, 0);
 	if (!error) {
 		/*
 		 * Get the local modify time for the case of a write
 		 * delegation.
 		 */
 		nfscl_deleggetmodtime(vp, &vap->va_mtime);
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * nfs setattr call.
  */
 static int
 nfs_setattr(struct vop_setattr_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct thread *td = curthread;	/* XXX */
 	struct vattr *vap = ap->a_vap;
 	int error = 0;
 	u_quad_t tsize;
 
 #ifndef nolint
 	tsize = (u_quad_t)0;
 #endif
 
 	/*
 	 * Setting of flags and marking of atimes are not supported.
 	 */
 	if (vap->va_flags != VNOVAL)
 		return (EOPNOTSUPP);
 
 	/*
 	 * Disallow write attempts if the filesystem is mounted read-only.
 	 */
   	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
 	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
 	    (vp->v_mount->mnt_flag & MNT_RDONLY))
 		return (EROFS);
 	if (vap->va_size != VNOVAL) {
  		switch (vp->v_type) {
  		case VDIR:
  			return (EISDIR);
  		case VCHR:
  		case VBLK:
  		case VSOCK:
  		case VFIFO:
 			if (vap->va_mtime.tv_sec == VNOVAL &&
 			    vap->va_atime.tv_sec == VNOVAL &&
 			    vap->va_mode == (mode_t)VNOVAL &&
 			    vap->va_uid == (uid_t)VNOVAL &&
 			    vap->va_gid == (gid_t)VNOVAL)
 				return (0);		
  			vap->va_size = VNOVAL;
  			break;
  		default:
 			/*
 			 * Disallow write attempts if the filesystem is
 			 * mounted read-only.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			/*
 			 *  We run vnode_pager_setsize() early (why?),
 			 * we must set np->n_size now to avoid vinvalbuf
 			 * V_SAVE races that might setsize a lower
 			 * value.
 			 */
 			mtx_lock(&np->n_mtx);
 			tsize = np->n_size;
 			mtx_unlock(&np->n_mtx);
 			error = ncl_meta_setsize(vp, ap->a_cred, td,
 			    vap->va_size);
 			mtx_lock(&np->n_mtx);
  			if (np->n_flag & NMODIFIED) {
 			    tsize = np->n_size;
 			    mtx_unlock(&np->n_mtx);
  			    if (vap->va_size == 0)
  				error = ncl_vinvalbuf(vp, 0, td, 1);
  			    else
  				error = ncl_vinvalbuf(vp, V_SAVE, td, 1);
  			    if (error) {
 				vnode_pager_setsize(vp, tsize);
 				return (error);
 			    }
 			    /*
 			     * Call nfscl_delegmodtime() to set the modify time
 			     * locally, as required.
 			     */
 			    nfscl_delegmodtime(vp);
  			} else
 			    mtx_unlock(&np->n_mtx);
 			/*
 			 * np->n_size has already been set to vap->va_size
 			 * in ncl_meta_setsize(). We must set it again since
 			 * nfs_loadattrcache() could be called through
 			 * ncl_meta_setsize() and could modify np->n_size.
 			 */
 			mtx_lock(&np->n_mtx);
  			np->n_vattr.na_size = np->n_size = vap->va_size;
 			mtx_unlock(&np->n_mtx);
   		}
   	} else {
 		mtx_lock(&np->n_mtx);
 		if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && 
 		    (np->n_flag & NMODIFIED) && vp->v_type == VREG) {
 			mtx_unlock(&np->n_mtx);
 			if ((error = ncl_vinvalbuf(vp, V_SAVE, td, 1)) != 0 &&
 			    (error == EINTR || error == EIO))
 				return (error);
 		} else
 			mtx_unlock(&np->n_mtx);
 	}
 	error = nfs_setattrrpc(vp, vap, ap->a_cred, td);
 	if (error && vap->va_size != VNOVAL) {
 		mtx_lock(&np->n_mtx);
 		np->n_size = np->n_vattr.na_size = tsize;
 		vnode_pager_setsize(vp, tsize);
 		mtx_unlock(&np->n_mtx);
 	}
 	return (error);
 }
 
 /*
  * Do an nfs setattr rpc.
  */
 static int
 nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsnode *np = VTONFS(vp);
 	int error, ret, attrflag, i;
 	struct nfsvattr nfsva;
 
 	if (NFS_ISV34(vp)) {
 		mtx_lock(&np->n_mtx);
 		for (i = 0; i < NFS_ACCESSCACHESIZE; i++)
 			np->n_accesscache[i].stamp = 0;
 		np->n_flag |= NDELEGMOD;
 		mtx_unlock(&np->n_mtx);
 		KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp);
 	}
 	error = nfsrpc_setattr(vp, vap, NULL, cred, td, &nfsva, &attrflag,
 	    NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, vap->va_uid, vap->va_gid);
 	return (error);
 }
 
 /*
  * nfs lookup call, one step at a time...
  * First look in cache
  * If not found, unlock the directory nfsnode and do the rpc
  */
 static int
 nfs_lookup(struct vop_lookup_args *ap)
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode *dvp = ap->a_dvp;
 	struct vnode **vpp = ap->a_vpp;
 	struct mount *mp = dvp->v_mount;
 	int flags = cnp->cn_flags;
 	struct vnode *newvp;
 	struct nfsmount *nmp;
 	struct nfsnode *np, *newnp;
 	int error = 0, attrflag, dattrflag, ltype, ncticks;
 	struct thread *td = cnp->cn_thread;
 	struct nfsfh *nfhp;
 	struct nfsvattr dnfsva, nfsva;
 	struct vattr vattr;
 	struct timespec nctime;
 	
 	*vpp = NULLVP;
 	if ((flags & ISLASTCN) && (mp->mnt_flag & MNT_RDONLY) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		return (EROFS);
 	if (dvp->v_type != VDIR)
 		return (ENOTDIR);
 	nmp = VFSTONFS(mp);
 	np = VTONFS(dvp);
 
 	/* For NFSv4, wait until any remove is done. */
 	mtx_lock(&np->n_mtx);
 	while (NFSHASNFSV4(nmp) && (np->n_flag & NREMOVEINPROG)) {
 		np->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)np, &np->n_mtx, PZERO, "nfslkup", 0);
 	}
 	mtx_unlock(&np->n_mtx);
 
 	if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0)
 		return (error);
 	error = cache_lookup(dvp, vpp, cnp, &nctime, &ncticks);
 	if (error > 0 && error != ENOENT)
 		return (error);
 	if (error == -1) {
 		/*
 		 * Lookups of "." are special and always return the
 		 * current directory.  cache_lookup() already handles
 		 * associated locking bookkeeping, etc.
 		 */
 		if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 			/* XXX: Is this really correct? */
 			if (cnp->cn_nameiop != LOOKUP &&
 			    (flags & ISLASTCN))
 				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 
 		/*
 		 * We only accept a positive hit in the cache if the
 		 * change time of the file matches our cached copy.
 		 * Otherwise, we discard the cache entry and fallback
 		 * to doing a lookup RPC.  We also only trust cache
 		 * entries for less than nm_nametimeo seconds.
 		 *
 		 * To better handle stale file handles and attributes,
 		 * clear the attribute cache of this node if it is a
 		 * leaf component, part of an open() call, and not
 		 * locally modified before fetching the attributes.
 		 * This should allow stale file handles to be detected
 		 * here where we can fall back to a LOOKUP RPC to
 		 * recover rather than having nfs_open() detect the
 		 * stale file handle and failing open(2) with ESTALE.
 		 */
 		newvp = *vpp;
 		newnp = VTONFS(newvp);
 		if (!(nmp->nm_flag & NFSMNT_NOCTO) &&
 		    (flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(newnp->n_flag & NMODIFIED)) {
 			mtx_lock(&newnp->n_mtx);
 			newnp->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			mtx_unlock(&newnp->n_mtx);
 		}
 		if (nfscl_nodeleg(newvp, 0) == 0 ||
 		    ((u_int)(ticks - ncticks) < (nmp->nm_nametimeo * hz) &&
 		    VOP_GETATTR(newvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_ctime, &nctime, ==))) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 			if (cnp->cn_nameiop != LOOKUP &&
 			    (flags & ISLASTCN))
 				cnp->cn_flags |= SAVENAME;
 			return (0);
 		}
 		cache_purge(newvp);
 		if (dvp != newvp)
 			vput(newvp);
 		else 
 			vrele(newvp);
 		*vpp = NULLVP;
 	} else if (error == ENOENT) {
 		if (dvp->v_iflag & VI_DOOMED)
 			return (ENOENT);
 		/*
 		 * We only accept a negative hit in the cache if the
 		 * modification time of the parent directory matches
 		 * the cached copy in the name cache entry.
 		 * Otherwise, we discard all of the negative cache
 		 * entries for this directory.  We also only trust
 		 * negative cache entries for up to nm_negnametimeo
 		 * seconds.
 		 */
 		if ((u_int)(ticks - ncticks) < (nmp->nm_negnametimeo * hz) &&
 		    VOP_GETATTR(dvp, &vattr, cnp->cn_cred) == 0 &&
 		    timespeccmp(&vattr.va_mtime, &nctime, ==)) {
 			NFSINCRGLOBAL(nfsstatsv1.lookupcache_hits);
 			return (ENOENT);
 		}
 		cache_purge_negative(dvp);
 	}
 
 	error = 0;
 	newvp = NULLVP;
 	NFSINCRGLOBAL(nfsstatsv1.lookupcache_misses);
 	error = nfsrpc_lookup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, td, &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 	    NULL);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (error) {
 		if (newvp != NULLVP) {
 			vput(newvp);
 			*vpp = NULLVP;
 		}
 
 		if (error != ENOENT) {
 			if (NFS_ISV4(dvp))
 				error = nfscl_maperr(td, error, (uid_t)0,
 				    (gid_t)0);
 			return (error);
 		}
 
 		/* The requested file was not found. */
 		if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
 		    (flags & ISLASTCN)) {
 			/*
 			 * XXX: UFS does a full VOP_ACCESS(dvp,
 			 * VWRITE) here instead of just checking
 			 * MNT_RDONLY.
 			 */
 			if (mp->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			cnp->cn_flags |= SAVENAME;
 			return (EJUSTRETURN);
 		}
 
 		if ((cnp->cn_flags & MAKEENTRY) != 0 && dattrflag) {
 			/*
 			 * Cache the modification time of the parent
 			 * directory from the post-op attributes in
 			 * the name cache entry.  The negative cache
 			 * entry will be ignored once the directory
 			 * has changed.  Don't bother adding the entry
 			 * if the directory has already changed.
 			 */
 			mtx_lock(&np->n_mtx);
 			if (timespeccmp(&np->n_vattr.na_mtime,
 			    &dnfsva.na_mtime, ==)) {
 				mtx_unlock(&np->n_mtx);
 				cache_enter_time(dvp, NULL, cnp,
 				    &dnfsva.na_mtime, NULL);
 			} else
 				mtx_unlock(&np->n_mtx);
 		}
 		return (ENOENT);
 	}
 
 	/*
 	 * Handle RENAME case...
 	 */
 	if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) {
 		if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 			FREE((caddr_t)nfhp, M_NFSFH);
 			return (EISDIR);
 		}
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    LK_EXCLUSIVE);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 		*vpp = newvp;
 		cnp->cn_flags |= SAVENAME;
 		return (0);
 	}
 
 	if (flags & ISDOTDOT) {
 		ltype = NFSVOPISLOCKED(dvp);
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			NFSVOPUNLOCK(dvp, 0);
 			error = vfs_busy(mp, 0);
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 			vfs_rel(mp);
 			if (error == 0 && (dvp->v_iflag & VI_DOOMED)) {
 				vfs_unbusy(mp);
 				error = ENOENT;
 			}
 			if (error != 0)
 				return (error);
 		}
 		NFSVOPUNLOCK(dvp, 0);
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    cnp->cn_lkflags);
 		if (error == 0)
 			newvp = NFSTOV(np);
 		vfs_unbusy(mp);
 		if (newvp != dvp)
 			NFSVOPLOCK(dvp, ltype | LK_RETRY);
 		if (dvp->v_iflag & VI_DOOMED) {
 			if (error == 0) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 			error = ENOENT;
 		}
 		if (error != 0)
 			return (error);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else if (NFS_CMPFH(np, nfhp->nfh_fh, nfhp->nfh_len)) {
 		FREE((caddr_t)nfhp, M_NFSFH);
 		VREF(dvp);
 		newvp = dvp;
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else {
 		error = nfscl_nget(mp, dvp, nfhp, cnp, td, &np, NULL,
 		    cnp->cn_lkflags);
 		if (error)
 			return (error);
 		newvp = NFSTOV(np);
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 		else if ((flags & (ISLASTCN | ISOPEN)) == (ISLASTCN | ISOPEN) &&
 		    !(np->n_flag & NMODIFIED)) {			
 			/*
 			 * Flush the attribute cache when opening a
 			 * leaf node to ensure that fresh attributes
 			 * are fetched in nfs_open() since we did not
 			 * fetch attributes from the LOOKUP reply.
 			 */
 			mtx_lock(&np->n_mtx);
 			np->n_attrstamp = 0;
 			KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(newvp);
 			mtx_unlock(&np->n_mtx);
 		}
 	}
 	if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
 		cnp->cn_flags |= SAVENAME;
 	if ((cnp->cn_flags & MAKEENTRY) &&
 	    (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN)) &&
 	    attrflag != 0 && (newvp->v_type != VDIR || dattrflag != 0))
 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 		    newvp->v_type != VDIR ? NULL : &dnfsva.na_ctime);
 	*vpp = newvp;
 	return (0);
 }
 
 /*
  * nfs read call.
  * Just call ncl_bioread() to do the work.
  */
 static int
 nfs_read(struct vop_read_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	switch (vp->v_type) {
 	case VREG:
 		return (ncl_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
 	case VDIR:
 		return (EISDIR);
 	default:
 		return (EOPNOTSUPP);
 	}
 }
 
 /*
  * nfs readlink call
  */
 static int
 nfs_readlink(struct vop_readlink_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 
 	if (vp->v_type != VLNK)
 		return (EINVAL);
 	return (ncl_bioread(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Do a readlink rpc.
  * Called by ncl_doio() from below the buffer cache.
  */
 int
 ncl_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 
 	error = nfsrpc_readlink(vp, uiop, cred, uiop->uio_td, &nfsva,
 	    &attrflag, NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs read rpc call
  * Ditto above
  */
 int
 ncl_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred)
 {
 	int error, ret, attrflag;
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, NULL, NULL,
 		    NFSV4OPEN_ACCESSREAD, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "readrpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_read(vp, uiop, cred, uiop->uio_td, &nfsva,
 		    &attrflag, NULL);
 	if (attrflag) {
 		ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs write call
  */
 int
 ncl_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     int *iomode, int *must_commit, int called_from_strategy)
 {
 	struct nfsvattr nfsva;
 	int error, attrflag, ret;
 	struct nfsmount *nmp;
 
 	nmp = VFSTONFS(vnode_mount(vp));
 	error = EIO;
 	attrflag = 0;
 	if (NFSHASPNFS(nmp))
 		error = nfscl_doiods(vp, uiop, iomode, must_commit,
 		    NFSV4OPEN_ACCESSWRITE, cred, uiop->uio_td);
 	NFSCL_DEBUG(4, "writerpc: aft doiods=%d\n", error);
 	if (error != 0)
 		error = nfsrpc_write(vp, uiop, iomode, must_commit, cred,
 		    uiop->uio_td, &nfsva, &attrflag, NULL,
 		    called_from_strategy);
 	if (attrflag) {
 		if (VTONFS(vp)->n_flag & ND_NFSV4)
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 1,
 			    1);
 		else
 			ret = nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 			    1);
 		if (ret && !error)
 			error = ret;
 	}
 	if (DOINGASYNC(vp))
 		*iomode = NFSWRITE_FILESYNC;
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(uiop->uio_td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs mknod rpc
  * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the
  * mode set to specify the file type and the size field for rdev.
  */
 static int
 nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
     struct vattr *vap)
 {
 	struct nfsvattr nfsva, dnfsva;
 	struct vnode *newvp = NULL;
 	struct nfsnode *np = NULL, *dnp;
 	struct nfsfh *nfhp;
 	struct vattr vattr;
 	int error = 0, attrflag, dattrflag;
 	u_int32_t rdev;
 
 	if (vap->va_type == VCHR || vap->va_type == VBLK)
 		rdev = vap->va_rdev;
 	else if (vap->va_type == VFIFO || vap->va_type == VSOCK)
 		rdev = 0xffffffff;
 	else
 		return (EOPNOTSUPP);
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	error = nfsrpc_mknod(dvp, cnp->cn_nameptr, cnp->cn_namelen, vap,
 	    rdev, vap->va_type, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 	if (!error) {
 		if (!nfhp)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 			    NULL);
 		if (nfhp)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag != 0) {
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		*vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	return (error);
 }
 
 /*
  * nfs mknod vop
  * just call nfs_mknodrpc() to do the work.
  */
 /* ARGSUSED */
 static int
 nfs_mknod(struct vop_mknod_args *ap)
 {
 	return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap));
 }
 
 static struct mtx nfs_cverf_mtx;
 MTX_SYSINIT(nfs_cverf_mtx, &nfs_cverf_mtx, "NFS create verifier mutex",
     MTX_DEF);
 
 static nfsquad_t
 nfs_get_cverf(void)
 {
 	static nfsquad_t cverf;
 	nfsquad_t ret;
 	static int cverf_initialized = 0;
 
 	mtx_lock(&nfs_cverf_mtx);
 	if (cverf_initialized == 0) {
 		cverf.lval[0] = arc4random();
 		cverf.lval[1] = arc4random();
 		cverf_initialized = 1;
 	} else
 		cverf.qval++;
 	ret = cverf;
 	mtx_unlock(&nfs_cverf_mtx);
 
 	return (ret);
 }
 
 /*
  * nfs file create call
  */
 static int
 nfs_create(struct vop_create_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct nfsmount *nmp;
 	struct nfsvattr dnfsva, nfsva;
 	struct nfsfh *nfhp;
 	nfsquad_t cverf;
 	int error = 0, attrflag, dattrflag, fmode = 0;
 	struct vattr vattr;
 
 	/*
 	 * Oops, not for me..
 	 */
 	if (vap->va_type == VSOCK)
 		return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap));
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)))
 		return (error);
 	if (vap->va_vaflags & VA_EXCLUSIVE)
 		fmode |= O_EXCL;
 	dnp = VTONFS(dvp);
 	nmp = VFSTONFS(vnode_mount(dvp));
 again:
 	/* For NFSv4, wait until any remove is done. */
 	mtx_lock(&dnp->n_mtx);
 	while (NFSHASNFSV4(nmp) && (dnp->n_flag & NREMOVEINPROG)) {
 		dnp->n_flag |= NREMOVEWANT;
 		(void) msleep((caddr_t)dnp, &dnp->n_mtx, PZERO, "nfscrt", 0);
 	}
 	mtx_unlock(&dnp->n_mtx);
 
 	cverf = nfs_get_cverf();
 	error = nfsrpc_create(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cverf, fmode, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, NULL);
 	if (!error) {
 		if (nfhp == NULL)
 			(void) nfsrpc_lookup(dvp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread,
 			    &dnfsva, &nfsva, &nfhp, &attrflag, &dattrflag,
 			    NULL);
 		if (nfhp != NULL)
 			error = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp,
 			    cnp->cn_thread, &np, NULL, LK_EXCLUSIVE);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (!error) {
 		newvp = NFSTOV(np);
 		if (attrflag == 0)
 			error = nfsrpc_getattr(newvp, cnp->cn_cred,
 			    cnp->cn_thread, &nfsva, NULL);
 		if (error == 0)
 			error = nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	}
 	if (error) {
 		if (newvp != NULL) {
 			vput(newvp);
 			newvp = NULL;
 		}
 		if (NFS_ISV34(dvp) && (fmode & O_EXCL) &&
 		    error == NFSERR_NOTSUPP) {
 			fmode &= ~O_EXCL;
 			goto again;
 		}
 	} else if (NFS_ISV34(dvp) && (fmode & O_EXCL)) {
 		if (nfscl_checksattr(vap, &nfsva)) {
 			error = nfsrpc_setattr(newvp, vap, NULL, cnp->cn_cred,
 			    cnp->cn_thread, &nfsva, &attrflag, NULL);
 			if (error && (vap->va_uid != (uid_t)VNOVAL ||
 			    vap->va_gid != (gid_t)VNOVAL)) {
 				/* try again without setting uid/gid */
 				vap->va_uid = (uid_t)VNOVAL;
 				vap->va_gid = (uid_t)VNOVAL;
 				error = nfsrpc_setattr(newvp, vap, NULL, 
 				    cnp->cn_cred, cnp->cn_thread, &nfsva,
 				    &attrflag, NULL);
 			}
 			if (attrflag)
 				(void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				    NULL, 0, 1);
 			if (error != 0)
 				vput(newvp);
 		}
 	}
 	if (!error) {
 		if ((cnp->cn_flags & MAKEENTRY) && attrflag)
 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 			    NULL);
 		*ap->a_vpp = newvp;
 	} else if (NFS_ISV4(dvp)) {
 		error = nfscl_maperr(cnp->cn_thread, error, vap->va_uid,
 		    vap->va_gid);
 	}
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	return (error);
 }
 
 /*
  * nfs file remove call
  * To try and make nfs semantics closer to ufs semantics, a file that has
  * other processes using the vnode is renamed instead of removed and then
  * removed later on the last close.
  * - If v_usecount > 1
  *	  If a rename is not already in the works
  *	     call nfs_sillyrename() to set it up
  *     else
  *	  do the remove rpc
  */
 static int
 nfs_remove(struct vop_remove_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = VTONFS(vp);
 	int error = 0;
 	struct vattr vattr;
 
 	KASSERT((cnp->cn_flags & HASBUF) != 0, ("nfs_remove: no name"));
 	KASSERT(vrefcnt(vp) > 0, ("nfs_remove: bad v_usecount"));
 	if (vp->v_type == VDIR)
 		error = EPERM;
 	else if (vrefcnt(vp) == 1 || (np->n_sillyrename &&
 	    VOP_GETATTR(vp, &vattr, cnp->cn_cred) == 0 &&
 	    vattr.va_nlink > 1)) {
 		/*
 		 * Purge the name cache so that the chance of a lookup for
 		 * the name succeeding while the remove is in progress is
 		 * minimized. Without node locking it can still happen, such
 		 * that an I/O op returns ESTALE, but since you get this if
 		 * another host removes the file..
 		 */
 		cache_purge(vp);
 		/*
 		 * throw away biocache buffers, mainly to avoid
 		 * unnecessary delayed writes later.
 		 */
 		error = ncl_vinvalbuf(vp, 0, cnp->cn_thread, 1);
 		/* Do the rpc */
 		if (error != EINTR && error != EIO)
 			error = nfs_removerpc(dvp, vp, cnp->cn_nameptr,
 			    cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread);
 		/*
 		 * Kludge City: If the first reply to the remove rpc is lost..
 		 *   the reply to the retransmitted request will be ENOENT
 		 *   since the file was in fact removed
 		 *   Therefore, we cheat and return success.
 		 */
 		if (error == ENOENT)
 			error = 0;
 	} else if (!np->n_sillyrename)
 		error = nfs_sillyrename(dvp, vp, cnp);
 	mtx_lock(&np->n_mtx);
 	np->n_attrstamp = 0;
 	mtx_unlock(&np->n_mtx);
 	KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	return (error);
 }
 
 /*
  * nfs file remove rpc called from nfs_inactive
  */
 int
 ncl_removeit(struct sillyrename *sp, struct vnode *vp)
 {
 	/*
 	 * Make sure that the directory vnode is still valid.
 	 * XXX we should lock sp->s_dvp here.
 	 */
 	if (sp->s_dvp->v_type == VBAD)
 		return (0);
 	return (nfs_removerpc(sp->s_dvp, vp, sp->s_name, sp->s_namlen,
 	    sp->s_cred, NULL));
 }
 
 /*
  * Nfs remove rpc, called from nfs_remove() and ncl_removeit().
  */
 static int
 nfs_removerpc(struct vnode *dvp, struct vnode *vp, char *name,
     int namelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr dnfsva;
 	struct nfsnode *dnp = VTONFS(dvp);
 	int error = 0, dattrflag;
 
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NREMOVEINPROG;
 	mtx_unlock(&dnp->n_mtx);
 	error = nfsrpc_remove(dvp, name, namelen, vp, cred, td, &dnfsva,
 	    &dattrflag, NULL);
 	mtx_lock(&dnp->n_mtx);
 	if ((dnp->n_flag & NREMOVEWANT)) {
 		dnp->n_flag &= ~(NREMOVEWANT | NREMOVEINPROG);
 		mtx_unlock(&dnp->n_mtx);
 		wakeup((caddr_t)dnp);
 	} else {
 		dnp->n_flag &= ~NREMOVEINPROG;
 		mtx_unlock(&dnp->n_mtx);
 	}
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (!dattrflag) {
 		dnp->n_attrstamp = 0;
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	mtx_unlock(&dnp->n_mtx);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs file rename call
  */
 static int
 nfs_rename(struct vop_rename_args *ap)
 {
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct nfsnode *fnp = VTONFS(ap->a_fvp);
 	struct nfsnode *tdnp = VTONFS(ap->a_tdvp);
 	struct nfsv4node *newv4 = NULL;
 	int error;
 
 	KASSERT((tcnp->cn_flags & HASBUF) != 0 &&
 	    (fcnp->cn_flags & HASBUF) != 0, ("nfs_rename: no name"));
 	/* Check for cross-device rename */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		goto out;
 	}
 
 	if (fvp == tvp) {
 		printf("nfs_rename: fvp == tvp (can't happen)\n");
 		error = 0;
 		goto out;
 	}
 	if ((error = NFSVOPLOCK(fvp, LK_EXCLUSIVE)) != 0)
 		goto out;
 
 	/*
 	 * We have to flush B_DELWRI data prior to renaming
 	 * the file.  If we don't, the delayed-write buffers
 	 * can be flushed out later after the file has gone stale
 	 * under NFSV3.  NFSV2 does not have this problem because
 	 * ( as far as I can tell ) it flushes dirty buffers more
 	 * often.
 	 * 
 	 * Skip the rename operation if the fsync fails, this can happen
 	 * due to the server's volume being full, when we pushed out data
 	 * that was written back to our cache earlier. Not checking for
 	 * this condition can result in potential (silent) data loss.
 	 */
 	error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread);
 	NFSVOPUNLOCK(fvp, 0);
 	if (!error && tvp)
 		error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread);
 	if (error)
 		goto out;
 
 	/*
 	 * If the tvp exists and is in use, sillyrename it before doing the
 	 * rename of the new file over it.
 	 * XXX Can't sillyrename a directory.
 	 */
 	if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename &&
 		tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) {
 		vput(tvp);
 		tvp = NULL;
 	}
 
 	error = nfs_renamerpc(fdvp, fvp, fcnp->cn_nameptr, fcnp->cn_namelen,
 	    tdvp, tvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred,
 	    tcnp->cn_thread);
 
 	if (error == 0 && NFS_ISV4(tdvp)) {
 		/*
 		 * For NFSv4, check to see if it is the same name and
 		 * replace the name, if it is different.
 		 */
 		MALLOC(newv4, struct nfsv4node *,
 		    sizeof (struct nfsv4node) +
 		    tdnp->n_fhp->nfh_len + tcnp->cn_namelen - 1,
 		    M_NFSV4NODE, M_WAITOK);
 		mtx_lock(&tdnp->n_mtx);
 		mtx_lock(&fnp->n_mtx);
 		if (fnp->n_v4 != NULL && fvp->v_type == VREG &&
 		    (fnp->n_v4->n4_namelen != tcnp->cn_namelen ||
 		      NFSBCMP(tcnp->cn_nameptr, NFS4NODENAME(fnp->n_v4),
 		      tcnp->cn_namelen) ||
 		      tdnp->n_fhp->nfh_len != fnp->n_v4->n4_fhlen ||
 		      NFSBCMP(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			tdnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (tcnp->cn_namelen < 100) ? tcnp->cn_namelen : 99;
 bcopy(tcnp->cn_nameptr, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("ren replace=%s\n",nnn);
 }
 #endif
 			FREE((caddr_t)fnp->n_v4, M_NFSV4NODE);
 			fnp->n_v4 = newv4;
 			newv4 = NULL;
 			fnp->n_v4->n4_fhlen = tdnp->n_fhp->nfh_len;
 			fnp->n_v4->n4_namelen = tcnp->cn_namelen;
 			NFSBCOPY(tdnp->n_fhp->nfh_fh, fnp->n_v4->n4_data,
 			    tdnp->n_fhp->nfh_len);
 			NFSBCOPY(tcnp->cn_nameptr,
 			    NFS4NODENAME(fnp->n_v4), tcnp->cn_namelen);
 		}
 		mtx_unlock(&tdnp->n_mtx);
 		mtx_unlock(&fnp->n_mtx);
 		if (newv4 != NULL)
 			FREE((caddr_t)newv4, M_NFSV4NODE);
 	}
 
 	if (fvp->v_type == VDIR) {
 		if (tvp != NULL && tvp->v_type == VDIR)
 			cache_purge(tdvp);
 		cache_purge(fdvp);
 	}
 
 out:
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp)
 		vput(tvp);
 	vrele(fdvp);
 	vrele(fvp);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs file rename rpc called from nfs_remove() above
  */
 static int
 nfs_renameit(struct vnode *sdvp, struct vnode *svp, struct componentname *scnp,
     struct sillyrename *sp)
 {
 
 	return (nfs_renamerpc(sdvp, svp, scnp->cn_nameptr, scnp->cn_namelen,
 	    sdvp, NULL, sp->s_name, sp->s_namlen, scnp->cn_cred,
 	    scnp->cn_thread));
 }
 
 /*
  * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit().
  */
 static int
 nfs_renamerpc(struct vnode *fdvp, struct vnode *fvp, char *fnameptr,
     int fnamelen, struct vnode *tdvp, struct vnode *tvp, char *tnameptr,
     int tnamelen, struct ucred *cred, struct thread *td)
 {
 	struct nfsvattr fnfsva, tnfsva;
 	struct nfsnode *fdnp = VTONFS(fdvp);
 	struct nfsnode *tdnp = VTONFS(tdvp);
 	int error = 0, fattrflag, tattrflag;
 
 	error = nfsrpc_rename(fdvp, fvp, fnameptr, fnamelen, tdvp, tvp,
 	    tnameptr, tnamelen, cred, td, &fnfsva, &tnfsva, &fattrflag,
 	    &tattrflag, NULL, NULL);
 	mtx_lock(&fdnp->n_mtx);
 	fdnp->n_flag |= NMODIFIED;
 	if (fattrflag != 0) {
 		mtx_unlock(&fdnp->n_mtx);
 		(void) nfscl_loadattrcache(&fdvp, &fnfsva, NULL, NULL, 0, 1);
 	} else {
 		fdnp->n_attrstamp = 0;
 		mtx_unlock(&fdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(fdvp);
 	}
 	mtx_lock(&tdnp->n_mtx);
 	tdnp->n_flag |= NMODIFIED;
 	if (tattrflag != 0) {
 		mtx_unlock(&tdnp->n_mtx);
 		(void) nfscl_loadattrcache(&tdvp, &tnfsva, NULL, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		mtx_unlock(&tdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (error && NFS_ISV4(fdvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs hard link create call
  */
 static int
 nfs_link(struct vop_link_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np, *tdnp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag;
 
 	/*
 	 * Push all writes to the server, so that the attribute cache
 	 * doesn't get "out of sync" with the server.
 	 * XXX There should be a better way!
 	 */
 	VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread);
 
 	error = nfsrpc_link(tdvp, vp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &attrflag,
 	    &dattrflag, NULL);
 	tdnp = VTONFS(tdvp);
 	mtx_lock(&tdnp->n_mtx);
 	tdnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&tdnp->n_mtx);
 		(void) nfscl_loadattrcache(&tdvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		tdnp->n_attrstamp = 0;
 		mtx_unlock(&tdnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(tdvp);
 	}
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 	else {
 		np = VTONFS(vp);
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 		mtx_unlock(&np->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(vp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		cache_enter_time(tdvp, vp, cnp, &nfsva.na_ctime, NULL);
 	}
 	if (error && NFS_ISV4(vp))
 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 		    (gid_t)0);
 	return (error);
 }
 
 /*
  * nfs symbolic link create call
  */
 static int
 nfs_symlink(struct vop_symlink_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsvattr nfsva, dnfsva;
 	struct nfsfh *nfhp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	int error = 0, attrflag, dattrflag, ret;
 
 	vap->va_type = VLNK;
 	error = nfsrpc_symlink(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    ap->a_target, vap, cnp->cn_cred, cnp->cn_thread, &dnfsva,
 	    &nfsva, &nfhp, &attrflag, &dattrflag, NULL);
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 		    &np, NULL, LK_EXCLUSIVE);
 		if (!ret)
 			newvp = NFSTOV(np);
 		else if (!error)
 			error = ret;
 	}
 	if (newvp != NULL) {
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	} else if (!error) {
 		/*
 		 * If we do not have an error and we could not extract the
 		 * newvp from the response due to the request being NFSv2, we
 		 * have to do a lookup in order to obtain a newvp to return.
 		 */
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_thread, &np);
 		if (!error)
 			newvp = NFSTOV(np);
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(cnp->cn_thread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		*ap->a_vpp = newvp;
 	}
 
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	/*
 	 * If negative lookup caching is enabled, I might as well
 	 * add an entry for this node. Not necessary for correctness,
 	 * but if negative caching is enabled, then the system
 	 * must care about lookup caching hit rate, so...
 	 */
 	if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 	    (cnp->cn_flags & MAKEENTRY) && attrflag != 0 && error == 0) {
 		cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime, NULL);
 	}
 	return (error);
 }
 
 /*
  * nfs make dir call
  */
 static int
 nfs_mkdir(struct vop_mkdir_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *np = NULL, *dnp;
 	struct vnode *newvp = NULL;
 	struct vattr vattr;
 	struct nfsfh *nfhp;
 	struct nfsvattr nfsva, dnfsva;
 	int error = 0, attrflag, dattrflag, ret;
 
 	if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0)
 		return (error);
 	vap->va_type = VDIR;
 	error = nfsrpc_mkdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    vap, cnp->cn_cred, cnp->cn_thread, &dnfsva, &nfsva, &nfhp,
 	    &attrflag, &dattrflag, NULL);
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 	if (nfhp) {
 		ret = nfscl_nget(dvp->v_mount, dvp, nfhp, cnp, cnp->cn_thread,
 		    &np, NULL, LK_EXCLUSIVE);
 		if (!ret) {
 			newvp = NFSTOV(np);
 			if (attrflag)
 			   (void) nfscl_loadattrcache(&newvp, &nfsva, NULL,
 				NULL, 0, 1);
 		} else if (!error)
 			error = ret;
 	}
 	if (!error && newvp == NULL) {
 		error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 		    cnp->cn_cred, cnp->cn_thread, &np);
 		if (!error) {
 			newvp = NFSTOV(np);
 			if (newvp->v_type != VDIR)
 				error = EEXIST;
 		}
 	}
 	if (error) {
 		if (newvp)
 			vput(newvp);
 		if (NFS_ISV4(dvp))
 			error = nfscl_maperr(cnp->cn_thread, error,
 			    vap->va_uid, vap->va_gid);
 	} else {
 		/*
 		 * If negative lookup caching is enabled, I might as well
 		 * add an entry for this node. Not necessary for correctness,
 		 * but if negative caching is enabled, then the system
 		 * must care about lookup caching hit rate, so...
 		 */
 		if (VFSTONFS(dvp->v_mount)->nm_negnametimeo != 0 &&
 		    (cnp->cn_flags & MAKEENTRY) &&
 		    attrflag != 0 && dattrflag != 0)
 			cache_enter_time(dvp, newvp, cnp, &nfsva.na_ctime,
 			    &dnfsva.na_ctime);
 		*ap->a_vpp = newvp;
 	}
 	return (error);
 }
 
 /*
  * nfs remove directory call
  */
 static int
 nfs_rmdir(struct vop_rmdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct nfsnode *dnp;
 	struct nfsvattr dnfsva;
 	int error, dattrflag;
 
 	if (dvp == vp)
 		return (EINVAL);
 	error = nfsrpc_rmdir(dvp, cnp->cn_nameptr, cnp->cn_namelen,
 	    cnp->cn_cred, cnp->cn_thread, &dnfsva, &dattrflag, NULL);
 	dnp = VTONFS(dvp);
 	mtx_lock(&dnp->n_mtx);
 	dnp->n_flag |= NMODIFIED;
 	if (dattrflag != 0) {
 		mtx_unlock(&dnp->n_mtx);
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	} else {
 		dnp->n_attrstamp = 0;
 		mtx_unlock(&dnp->n_mtx);
 		KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(dvp);
 	}
 
 	cache_purge(dvp);
 	cache_purge(vp);
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(cnp->cn_thread, error, (uid_t)0,
 		    (gid_t)0);
 	/*
 	 * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
 	 */
 	if (error == ENOENT)
 		error = 0;
 	return (error);
 }
 
 /*
  * nfs readdir call
  */
 static int
 nfs_readdir(struct vop_readdir_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct uio *uio = ap->a_uio;
 	ssize_t tresid, left;
 	int error = 0;
 	struct vattr vattr;
 	
 	if (ap->a_eofflag != NULL)
 		*ap->a_eofflag = 0;
 	if (vp->v_type != VDIR) 
 		return(EPERM);
 
 	/*
 	 * First, check for hit on the EOF offset cache
 	 */
 	if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset &&
 	    (np->n_flag & NMODIFIED) == 0) {
 		if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) {
 			mtx_lock(&np->n_mtx);
 			if ((NFS_ISV4(vp) && np->n_change == vattr.va_filerev) ||
 			    !NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) {
 				mtx_unlock(&np->n_mtx);
 				NFSINCRGLOBAL(nfsstatsv1.direofcache_hits);
 				if (ap->a_eofflag != NULL)
 					*ap->a_eofflag = 1;
 				return (0);
 			} else
 				mtx_unlock(&np->n_mtx);
 		}
 	}
 
 	/*
 	 * NFS always guarantees that directory entries don't straddle
 	 * DIRBLKSIZ boundaries.  As such, we need to limit the size
 	 * to an exact multiple of DIRBLKSIZ, to avoid copying a partial
 	 * directory entry.
 	 */
 	left = uio->uio_resid % DIRBLKSIZ;
 	if (left == uio->uio_resid)
 		return (EINVAL);
 	uio->uio_resid -= left;
 
 	/*
 	 * Call ncl_bioread() to do the real work.
 	 */
 	tresid = uio->uio_resid;
 	error = ncl_bioread(vp, uio, 0, ap->a_cred);
 
 	if (!error && uio->uio_resid == tresid) {
 		NFSINCRGLOBAL(nfsstatsv1.direofcache_misses);
 		if (ap->a_eofflag != NULL)
 			*ap->a_eofflag = 1;
 	}
 	
 	/* Add the partial DIRBLKSIZ (left) back in. */
 	uio->uio_resid += left;
 	return (error);
 }
 
 /*
  * Readdir rpc call.
  * Called from below the buffer cache by ncl_doio().
  */
 int
 ncl_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, eof, attrflag;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);		
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 
 	error = nfsrpc_readdir(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at the end of the directory or have filled
 		 * the block.
 		 */
 		if (eof)
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * NFS V3 readdir plus RPC. Used in place of ncl_readdirrpc().
  */
 int
 ncl_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred,
     struct thread *td)
 {
 	struct nfsvattr nfsva;
 	nfsuint64 *cookiep, cookie;
 	struct nfsnode *dnp = VTONFS(vp);
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, attrflag, eof;
 
 	KASSERT(uiop->uio_iovcnt == 1 &&
 	    (uiop->uio_offset & (DIRBLKSIZ - 1)) == 0 &&
 	    (uiop->uio_resid & (DIRBLKSIZ - 1)) == 0,
 	    ("nfs readdirplusrpc bad uio"));
 
 	/*
 	 * If there is no cookie, assume directory was stale.
 	 */
 	ncl_dircookie_lock(dnp);
 	cookiep = ncl_getcookie(dnp, uiop->uio_offset, 0);
 	if (cookiep) {
 		cookie = *cookiep;
 		ncl_dircookie_unlock(dnp);
 	} else {
 		ncl_dircookie_unlock(dnp);
 		return (NFSERR_BAD_COOKIE);
 	}
 
 	if (NFSHASNFSV3(nmp) && !NFSHASGOTFSINFO(nmp))
 		(void)ncl_fsinfo(nmp, vp, cred, td);
 	error = nfsrpc_readdirplus(vp, uiop, &cookie, cred, td, &nfsva,
 	    &attrflag, &eof, NULL);
 	if (attrflag)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0, 1);
 
 	if (!error) {
 		/*
 		 * We are now either at end of the directory or have filled the
 		 * the block.
 		 */
 		if (eof)
 			dnp->n_direofoffset = uiop->uio_offset;
 		else {
 			if (uiop->uio_resid > 0)
 				printf("EEK! readdirplusrpc resid > 0\n");
 			ncl_dircookie_lock(dnp);
 			cookiep = ncl_getcookie(dnp, uiop->uio_offset, 1);
 			*cookiep = cookie;
 			ncl_dircookie_unlock(dnp);
 		}
 	} else if (NFS_ISV4(vp)) {
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	}
 	return (error);
 }
 
 /*
  * Silly rename. To make the NFS filesystem that is stateless look a little
  * more like the "ufs" a remove of an active vnode is translated to a rename
  * to a funny looking filename that is removed by nfs_inactive on the
  * nfsnode. There is the potential for another process on a different client
  * to create the same funny name between the nfs_lookitup() fails and the
  * nfs_rename() completes, but...
  */
 static int
 nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp)
 {
 	struct sillyrename *sp;
 	struct nfsnode *np;
 	int error;
 	short pid;
 	unsigned int lticks;
 
 	cache_purge(dvp);
 	np = VTONFS(vp);
 	KASSERT(vp->v_type != VDIR, ("nfs: sillyrename dir"));
 	MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
 	    M_NEWNFSREQ, M_WAITOK);
 	sp->s_cred = crhold(cnp->cn_cred);
 	sp->s_dvp = dvp;
 	VREF(dvp);
 
 	/* 
 	 * Fudge together a funny name.
 	 * Changing the format of the funny name to accommodate more 
 	 * sillynames per directory.
 	 * The name is now changed to .nfs.<ticks>.<pid>.4, where ticks is 
 	 * CPU ticks since boot.
 	 */
 	pid = cnp->cn_thread->td_proc->p_pid;
 	lticks = (unsigned int)ticks;
 	for ( ; ; ) {
 		sp->s_namlen = sprintf(sp->s_name, 
 				       ".nfs.%08x.%04x4.4", lticks, 
 				       pid);
 		if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 				 cnp->cn_thread, NULL))
 			break;
 		lticks++;
 	}
 	error = nfs_renameit(dvp, vp, cnp, sp);
 	if (error)
 		goto bad;
 	error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred,
 		cnp->cn_thread, &np);
 	np->n_sillyrename = sp;
 	return (0);
 bad:
 	vrele(sp->s_dvp);
 	crfree(sp->s_cred);
 	free((caddr_t)sp, M_NEWNFSREQ);
 	return (error);
 }
 
 /*
  * Look up a file name and optionally either update the file handle or
  * allocate an nfsnode, depending on the value of npp.
  * npp == NULL	--> just do the lookup
  * *npp == NULL --> allocate a new nfsnode and make sure attributes are
  *			handled too
  * *npp != NULL --> update the file handle in the vnode
  */
 static int
 nfs_lookitup(struct vnode *dvp, char *name, int len, struct ucred *cred,
     struct thread *td, struct nfsnode **npp)
 {
 	struct vnode *newvp = NULL, *vp;
 	struct nfsnode *np, *dnp = VTONFS(dvp);
 	struct nfsfh *nfhp, *onfhp;
 	struct nfsvattr nfsva, dnfsva;
 	struct componentname cn;
 	int error = 0, attrflag, dattrflag;
 	u_int hash;
 
 	error = nfsrpc_lookup(dvp, name, len, cred, td, &dnfsva, &nfsva,
 	    &nfhp, &attrflag, &dattrflag, NULL);
 	if (dattrflag)
 		(void) nfscl_loadattrcache(&dvp, &dnfsva, NULL, NULL, 0, 1);
 	if (npp && !error) {
 		if (*npp != NULL) {
 		    np = *npp;
 		    vp = NFSTOV(np);
 		    /*
 		     * For NFSv4, check to see if it is the same name and
 		     * replace the name, if it is different.
 		     */
 		    if (np->n_v4 != NULL && nfsva.na_type == VREG &&
 			(np->n_v4->n4_namelen != len ||
 			 NFSBCMP(name, NFS4NODENAME(np->n_v4), len) ||
 			 dnp->n_fhp->nfh_len != np->n_v4->n4_fhlen ||
 			 NFSBCMP(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 			 dnp->n_fhp->nfh_len))) {
 #ifdef notdef
 { char nnn[100]; int nnnl;
 nnnl = (len < 100) ? len : 99;
 bcopy(name, nnn, nnnl);
 nnn[nnnl] = '\0';
 printf("replace=%s\n",nnn);
 }
 #endif
 			    FREE((caddr_t)np->n_v4, M_NFSV4NODE);
 			    MALLOC(np->n_v4, struct nfsv4node *,
 				sizeof (struct nfsv4node) +
 				dnp->n_fhp->nfh_len + len - 1,
 				M_NFSV4NODE, M_WAITOK);
 			    np->n_v4->n4_fhlen = dnp->n_fhp->nfh_len;
 			    np->n_v4->n4_namelen = len;
 			    NFSBCOPY(dnp->n_fhp->nfh_fh, np->n_v4->n4_data,
 				dnp->n_fhp->nfh_len);
 			    NFSBCOPY(name, NFS4NODENAME(np->n_v4), len);
 		    }
 		    hash = fnv_32_buf(nfhp->nfh_fh, nfhp->nfh_len,
 			FNV1_32_INIT);
 		    onfhp = np->n_fhp;
 		    /*
 		     * Rehash node for new file handle.
 		     */
 		    vfs_hash_rehash(vp, hash);
 		    np->n_fhp = nfhp;
 		    if (onfhp != NULL)
 			FREE((caddr_t)onfhp, M_NFSFH);
 		    newvp = NFSTOV(np);
 		} else if (NFS_CMPFH(dnp, nfhp->nfh_fh, nfhp->nfh_len)) {
 		    FREE((caddr_t)nfhp, M_NFSFH);
 		    VREF(dvp);
 		    newvp = dvp;
 		} else {
 		    cn.cn_nameptr = name;
 		    cn.cn_namelen = len;
 		    error = nfscl_nget(dvp->v_mount, dvp, nfhp, &cn, td,
 			&np, NULL, LK_EXCLUSIVE);
 		    if (error)
 			return (error);
 		    newvp = NFSTOV(np);
 		}
 		if (!attrflag && *npp == NULL) {
 			if (newvp == dvp)
 				vrele(newvp);
 			else
 				vput(newvp);
 			return (ENOENT);
 		}
 		if (attrflag)
 			(void) nfscl_loadattrcache(&newvp, &nfsva, NULL, NULL,
 			    0, 1);
 	}
 	if (npp && *npp == NULL) {
 		if (error) {
 			if (newvp) {
 				if (newvp == dvp)
 					vrele(newvp);
 				else
 					vput(newvp);
 			}
 		} else
 			*npp = np;
 	}
 	if (error && NFS_ISV4(dvp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Nfs Version 3 and 4 commit rpc
  */
 int
 ncl_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred,
    struct thread *td)
 {
 	struct nfsvattr nfsva;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error, attrflag;
 
 	mtx_lock(&nmp->nm_mtx);
 	if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) {
 		mtx_unlock(&nmp->nm_mtx);
 		return (0);
 	}
 	mtx_unlock(&nmp->nm_mtx);
 	error = nfsrpc_commit(vp, offset, cnt, cred, td, &nfsva,
 	    &attrflag, NULL);
 	if (attrflag != 0)
 		(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL,
 		    0, 1);
 	if (error != 0 && NFS_ISV4(vp))
 		error = nfscl_maperr(td, error, (uid_t)0, (gid_t)0);
 	return (error);
 }
 
 /*
  * Strategy routine.
  * For async requests when nfsiod(s) are running, queue the request by
  * calling ncl_asyncio(), otherwise just all ncl_doio() to do the
  * request.
  */
 static int
 nfs_strategy(struct vop_strategy_args *ap)
 {
 	struct buf *bp;
 	struct vnode *vp;
 	struct ucred *cr;
 
 	bp = ap->a_bp;
 	vp = ap->a_vp;
 	KASSERT(bp->b_vp == vp, ("missing b_getvp"));
 	KASSERT(!(bp->b_flags & B_DONE),
 	    ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp));
 	BUF_ASSERT_HELD(bp);
 
 	if (vp->v_type == VREG && bp->b_blkno == bp->b_lblkno)
 		bp->b_blkno = bp->b_lblkno * (vp->v_bufobj.bo_bsize /
 		    DEV_BSIZE);
 	if (bp->b_iocmd == BIO_READ)
 		cr = bp->b_rcred;
 	else
 		cr = bp->b_wcred;
 
 	/*
 	 * If the op is asynchronous and an i/o daemon is waiting
 	 * queue the request, wake it up and wait for completion
 	 * otherwise just do it ourselves.
 	 */
 	if ((bp->b_flags & B_ASYNC) == 0 ||
 	    ncl_asyncio(VFSTONFS(vp->v_mount), bp, NOCRED, curthread))
 		(void) ncl_doio(vp, bp, cr, curthread, 1);
 	return (0);
 }
 
 /*
  * fsync vnode op. Just call ncl_flush() with commit == 1.
  */
 /* ARGSUSED */
 static int
 nfs_fsync(struct vop_fsync_args *ap)
 {
 
 	if (ap->a_vp->v_type != VREG) {
 		/*
 		 * For NFS, metadata is changed synchronously on the server,
 		 * so there is nothing to flush. Also, ncl_flush() clears
 		 * the NMODIFIED flag and that shouldn't be done here for
 		 * directories.
 		 */
 		return (0);
 	}
 	return (ncl_flush(ap->a_vp, ap->a_waitfor, NULL, ap->a_td, 1, 0));
 }
 
 /*
  * Flush all the blocks associated with a vnode.
  * 	Walk through the buffer pool and push any dirty pages
  *	associated with the vnode.
  * If the called_from_renewthread argument is TRUE, it has been called
  * from the NFSv4 renew thread and, as such, cannot block indefinitely
  * waiting for a buffer write to complete.
  */
 int
 ncl_flush(struct vnode *vp, int waitfor, struct ucred *cred, struct thread *td,
     int commit, int called_from_renewthread)
 {
 	struct nfsnode *np = VTONFS(vp);
 	struct buf *bp;
 	int i;
 	struct buf *nbp;
 	struct nfsmount *nmp = VFSTONFS(vp->v_mount);
 	int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos;
 	int passone = 1, trycnt = 0;
 	u_quad_t off, endoff, toff;
 	struct ucred* wcred = NULL;
 	struct buf **bvec = NULL;
 	struct bufobj *bo;
 #ifndef NFS_COMMITBVECSIZ
 #define	NFS_COMMITBVECSIZ	20
 #endif
 	struct buf *bvec_on_stack[NFS_COMMITBVECSIZ];
 	int bvecsize = 0, bveccount;
 
 	if (called_from_renewthread != 0)
 		slptimeo = hz;
 	if (nmp->nm_flag & NFSMNT_INT)
 		slpflag = PCATCH;
 	if (!commit)
 		passone = 0;
 	bo = &vp->v_bufobj;
 	/*
 	 * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the
 	 * server, but has not been committed to stable storage on the server
 	 * yet. On the first pass, the byte range is worked out and the commit
 	 * rpc is done. On the second pass, ncl_writebp() is called to do the
 	 * job.
 	 */
 again:
 	off = (u_quad_t)-1;
 	endoff = 0;
 	bvecpos = 0;
 	if (NFS_ISV34(vp) && commit) {
 		if (bvec != NULL && bvec != bvec_on_stack)
 			free(bvec, M_TEMP);
 		/*
 		 * Count up how many buffers waiting for a commit.
 		 */
 		bveccount = 0;
 		BO_LOCK(bo);
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (!BUF_ISLOCKED(bp) &&
 			    (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT))
 				== (B_DELWRI | B_NEEDCOMMIT))
 				bveccount++;
 		}
 		/*
 		 * Allocate space to remember the list of bufs to commit.  It is
 		 * important to use M_NOWAIT here to avoid a race with nfs_write.
 		 * If we can't get memory (for whatever reason), we will end up
 		 * committing the buffers one-by-one in the loop below.
 		 */
 		if (bveccount > NFS_COMMITBVECSIZ) {
 			/*
 			 * Release the vnode interlock to avoid a lock
 			 * order reversal.
 			 */
 			BO_UNLOCK(bo);
 			bvec = (struct buf **)
 				malloc(bveccount * sizeof(struct buf *),
 				       M_TEMP, M_NOWAIT);
 			BO_LOCK(bo);
 			if (bvec == NULL) {
 				bvec = bvec_on_stack;
 				bvecsize = NFS_COMMITBVECSIZ;
 			} else
 				bvecsize = bveccount;
 		} else {
 			bvec = bvec_on_stack;
 			bvecsize = NFS_COMMITBVECSIZ;
 		}
 		TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 			if (bvecpos >= bvecsize)
 				break;
 			if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) !=
 			    (B_DELWRI | B_NEEDCOMMIT)) {
 				BUF_UNLOCK(bp);
 				nbp = TAILQ_NEXT(bp, b_bobufs);
 				continue;
 			}
 			BO_UNLOCK(bo);
 			bremfree(bp);
 			/*
 			 * Work out if all buffers are using the same cred
 			 * so we can deal with them all with one commit.
 			 *
 			 * NOTE: we are not clearing B_DONE here, so we have
 			 * to do it later on in this routine if we intend to
 			 * initiate I/O on the bp.
 			 *
 			 * Note: to avoid loopback deadlocks, we do not
 			 * assign b_runningbufspace.
 			 */
 			if (wcred == NULL)
 				wcred = bp->b_wcred;
 			else if (wcred != bp->b_wcred)
 				wcred = NOCRED;
 			vfs_busy_pages(bp, 1);
 
 			BO_LOCK(bo);
 			/*
 			 * bp is protected by being locked, but nbp is not
 			 * and vfs_busy_pages() may sleep.  We have to
 			 * recalculate nbp.
 			 */
 			nbp = TAILQ_NEXT(bp, b_bobufs);
 
 			/*
 			 * A list of these buffers is kept so that the
 			 * second loop knows which buffers have actually
 			 * been committed. This is necessary, since there
 			 * may be a race between the commit rpc and new
 			 * uncommitted writes on the file.
 			 */
 			bvec[bvecpos++] = bp;
 			toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 				bp->b_dirtyoff;
 			if (toff < off)
 				off = toff;
 			toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff);
 			if (toff > endoff)
 				endoff = toff;
 		}
 		BO_UNLOCK(bo);
 	}
 	if (bvecpos > 0) {
 		/*
 		 * Commit data on the server, as required.
 		 * If all bufs are using the same wcred, then use that with
 		 * one call for all of them, otherwise commit each one
 		 * separately.
 		 */
 		if (wcred != NOCRED)
 			retv = ncl_commit(vp, off, (int)(endoff - off),
 					  wcred, td);
 		else {
 			retv = 0;
 			for (i = 0; i < bvecpos; i++) {
 				off_t off, size;
 				bp = bvec[i];
 				off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE +
 					bp->b_dirtyoff;
 				size = (u_quad_t)(bp->b_dirtyend
 						  - bp->b_dirtyoff);
 				retv = ncl_commit(vp, off, (int)size,
 						  bp->b_wcred, td);
 				if (retv) break;
 			}
 		}
 
 		if (retv == NFSERR_STALEWRITEVERF)
 			ncl_clearcommit(vp->v_mount);
 
 		/*
 		 * Now, either mark the blocks I/O done or mark the
 		 * blocks dirty, depending on whether the commit
 		 * succeeded.
 		 */
 		for (i = 0; i < bvecpos; i++) {
 			bp = bvec[i];
 			bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK);
 			if (retv) {
 				/*
 				 * Error, leave B_DELWRI intact
 				 */
 				vfs_unbusy_pages(bp);
 				brelse(bp);
 			} else {
 				/*
 				 * Success, remove B_DELWRI ( bundirty() ).
 				 *
 				 * b_dirtyoff/b_dirtyend seem to be NFS
 				 * specific.  We should probably move that
 				 * into bundirty(). XXX
 				 */
 				bufobj_wref(bo);
 				bp->b_flags |= B_ASYNC;
 				bundirty(bp);
 				bp->b_flags &= ~B_DONE;
 				bp->b_ioflags &= ~BIO_ERROR;
 				bp->b_dirtyoff = bp->b_dirtyend = 0;
 				bufdone(bp);
 			}
 		}
 	}
 
 	/*
 	 * Start/do any write(s) that are required.
 	 */
 loop:
 	BO_LOCK(bo);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) {
 			if (waitfor != MNT_WAIT || passone)
 				continue;
 
 			error = BUF_TIMELOCK(bp,
 			    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 			    BO_LOCKPTR(bo), "nfsfsync", slpflag, slptimeo);
 			if (error == 0) {
 				BUF_UNLOCK(bp);
 				goto loop;
 			}
 			if (error == ENOLCK) {
 				error = 0;
 				goto loop;
 			}
 			if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so the flush will be retried
 				 * later.
 				 */
 				error = EIO;
 				goto done;
 			}
 			if (newnfs_sigintr(nmp, td)) {
 				error = EINTR;
 				goto done;
 			}
 			if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			}
 			goto loop;
 		}
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("nfs_fsync: not dirty");
 		if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		if (passone || !commit)
 		    bp->b_flags |= B_ASYNC;
 		else
 		    bp->b_flags |= B_ASYNC;
 		bwrite(bp);
 		if (newnfs_sigintr(nmp, td)) {
 			error = EINTR;
 			goto done;
 		}
 		goto loop;
 	}
 	if (passone) {
 		passone = 0;
 		BO_UNLOCK(bo);
 		goto again;
 	}
 	if (waitfor == MNT_WAIT) {
 		while (bo->bo_numoutput) {
 			error = bufobj_wwait(bo, slpflag, slptimeo);
 			if (error) {
 			    BO_UNLOCK(bo);
 			    if (called_from_renewthread != 0) {
 				/*
 				 * Return EIO so that the flush will be
 				 * retried later.
 				 */
 				error = EIO;
 				goto done;
 			    }
 			    error = newnfs_sigintr(nmp, td);
 			    if (error)
 				goto done;
 			    if (slpflag == PCATCH) {
 				slpflag = 0;
 				slptimeo = 2 * hz;
 			    }
 			    BO_LOCK(bo);
 			}
 		}
 		if (bo->bo_dirty.bv_cnt != 0 && commit) {
 			BO_UNLOCK(bo);
 			goto loop;
 		}
 		/*
 		 * Wait for all the async IO requests to drain
 		 */
 		BO_UNLOCK(bo);
 		mtx_lock(&np->n_mtx);
 		while (np->n_directio_asyncwr > 0) {
 			np->n_flag |= NFSYNCWAIT;
 			error = newnfs_msleep(td, &np->n_directio_asyncwr,
 			    &np->n_mtx, slpflag | (PRIBIO + 1), 
 			    "nfsfsync", 0);
 			if (error) {
 				if (newnfs_sigintr(nmp, td)) {
 					mtx_unlock(&np->n_mtx);
 					error = EINTR;	
 					goto done;
 				}
 			}
 		}
 		mtx_unlock(&np->n_mtx);
 	} else
 		BO_UNLOCK(bo);
 	if (NFSHASPNFS(nmp)) {
 		nfscl_layoutcommit(vp, td);
 		/*
 		 * Invalidate the attribute cache, since writes to a DS
 		 * won't update the size attribute.
 		 */
 		mtx_lock(&np->n_mtx);
 		np->n_attrstamp = 0;
 	} else
 		mtx_lock(&np->n_mtx);
 	if (np->n_flag & NWRITEERR) {
 		error = np->n_error;
 		np->n_flag &= ~NWRITEERR;
 	}
   	if (commit && bo->bo_dirty.bv_cnt == 0 &&
 	    bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0)
   		np->n_flag &= ~NMODIFIED;
 	mtx_unlock(&np->n_mtx);
 done:
 	if (bvec != NULL && bvec != bvec_on_stack)
 		free(bvec, M_TEMP);
 	if (error == 0 && commit != 0 && waitfor == MNT_WAIT &&
 	    (bo->bo_dirty.bv_cnt != 0 || bo->bo_numoutput != 0 ||
 	     np->n_directio_asyncwr != 0) && trycnt++ < 5) {
 		/* try, try again... */
 		passone = 1;
 		wcred = NULL;
 		bvec = NULL;
 		bvecsize = 0;
 printf("try%d\n", trycnt);
 		goto again;
 	}
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlock(struct vop_advlock_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct ucred *cred;
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	struct proc *p = (struct proc *)ap->a_id;
 	struct thread *td = curthread;	/* XXX */
 	struct vattr va;
 	int ret, error = EOPNOTSUPP;
 	u_quad_t size;
 	
 	if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
 		if (vp->v_type != VREG)
 			return (EINVAL);
 		if ((ap->a_flags & F_POSIX) != 0)
 			cred = p->p_ucred;
 		else
 			cred = td->td_ucred;
 		NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (vp->v_iflag & VI_DOOMED) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EBADF);
 		}
 
 		/*
 		 * If this is unlocking a write locked region, flush and
 		 * commit them before unlocking. This is required by
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_UNLCK &&
 		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
 		    ap->a_flags))
 			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
 
 		/*
 		 * Loop around doing the lock op, while a blocking lock
 		 * must wait for the lock op to succeed.
 		 */
 		do {
 			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
 			    ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 			    ap->a_op == F_SETLK) {
 				NFSVOPUNLOCK(vp, 0);
 				error = nfs_catnap(PZERO | PCATCH, ret,
 				    "ncladvl");
 				if (error)
 					return (EINTR);
 				NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY);
 				if (vp->v_iflag & VI_DOOMED) {
 					NFSVOPUNLOCK(vp, 0);
 					return (EBADF);
 				}
 			}
 		} while (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 		     ap->a_op == F_SETLK);
 		if (ret == NFSERR_DENIED) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EAGAIN);
 		} else if (ret == EINVAL || ret == EBADF || ret == EINTR) {
 			NFSVOPUNLOCK(vp, 0);
 			return (ret);
 		} else if (ret != 0) {
 			NFSVOPUNLOCK(vp, 0);
 			return (EACCES);
 		}
 
 		/*
 		 * Now, if we just got a lock, invalidate data in the buffer
 		 * cache, as required, so that the coherency conforms with
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_SETLK) {
 			if ((np->n_flag & NMODIFIED) == 0) {
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 			}
 			if ((np->n_flag & NMODIFIED) || ret ||
 			    np->n_change != va.va_filerev) {
 				(void) ncl_vinvalbuf(vp, V_SAVE, td, 1);
 				np->n_attrstamp = 0;
 				KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp);
 				ret = VOP_GETATTR(vp, &va, cred);
 				if (!ret) {
 					np->n_mtime = va.va_mtime;
 					np->n_change = va.va_filerev;
 				}
 			}
 			/* Mark that a file lock has been acquired. */
 			mtx_lock(&np->n_mtx);
 			np->n_flag |= NHASBEENLOCKED;
 			mtx_unlock(&np->n_mtx);
 		}
 		NFSVOPUNLOCK(vp, 0);
 		return (0);
 	} else if (!NFS_ISV4(vp)) {
 		error = NFSVOPLOCK(vp, LK_SHARED);
 		if (error)
 			return (error);
 		if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 			size = VTONFS(vp)->n_size;
 			NFSVOPUNLOCK(vp, 0);
 			error = lf_advlock(ap, &(vp->v_lockf), size);
 		} else {
 			if (nfs_advlock_p != NULL)
 				error = nfs_advlock_p(ap);
 			else {
 				NFSVOPUNLOCK(vp, 0);
 				error = ENOLCK;
 			}
 		}
 		if (error == 0 && ap->a_op == F_SETLK) {
 			error = NFSVOPLOCK(vp, LK_SHARED);
 			if (error == 0) {
 				/* Mark that a file lock has been acquired. */
 				mtx_lock(&np->n_mtx);
 				np->n_flag |= NHASBEENLOCKED;
 				mtx_unlock(&np->n_mtx);
 				NFSVOPUNLOCK(vp, 0);
 			}
 		}
 	}
 	return (error);
 }
 
 /*
  * NFS advisory byte-level locks.
  */
 static int
 nfs_advlockasync(struct vop_advlockasync_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	u_quad_t size;
 	int error;
 	
 	if (NFS_ISV4(vp))
 		return (EOPNOTSUPP);
 	error = NFSVOPLOCK(vp, LK_SHARED);
 	if (error)
 		return (error);
 	if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) {
 		size = VTONFS(vp)->n_size;
 		NFSVOPUNLOCK(vp, 0);
 		error = lf_advlockasync(ap, &(vp->v_lockf), size);
 	} else {
 		NFSVOPUNLOCK(vp, 0);
 		error = EOPNOTSUPP;
 	}
 	return (error);
 }
 
 /*
  * Print out the contents of an nfsnode.
  */
 static int
 nfs_print(struct vop_print_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 
-	printf("\tfileid %ld fsid 0x%x", np->n_vattr.na_fileid,
-	    np->n_vattr.na_fsid);
+	printf("\tfileid %jd fsid 0x%jx", (uintmax_t)np->n_vattr.na_fileid,
+	    (uintmax_t)np->n_vattr.na_fsid);
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * This is the "real" nfs::bwrite(struct buf*).
  * We set B_CACHE if this is a VMIO buffer.
  */
 int
 ncl_writebp(struct buf *bp, int force __unused, struct thread *td)
 {
 	int s;
 	int oldflags = bp->b_flags;
 #if 0
 	int retv = 1;
 	off_t off;
 #endif
 
 	BUF_ASSERT_HELD(bp);
 
 	if (bp->b_flags & B_INVAL) {
 		brelse(bp);
 		return(0);
 	}
 
 	bp->b_flags |= B_CACHE;
 
 	/*
 	 * Undirty the bp.  We will redirty it later if the I/O fails.
 	 */
 
 	s = splbio();
 	bundirty(bp);
 	bp->b_flags &= ~B_DONE;
 	bp->b_ioflags &= ~BIO_ERROR;
 	bp->b_iocmd = BIO_WRITE;
 
 	bufobj_wref(bp->b_bufobj);
 	curthread->td_ru.ru_oublock++;
 	splx(s);
 
 	/*
 	 * Note: to avoid loopback deadlocks, we do not
 	 * assign b_runningbufspace.
 	 */
 	vfs_busy_pages(bp, 1);
 
 	BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	bstrategy(bp);
 
 	if( (oldflags & B_ASYNC) == 0) {
 		int rtval = bufwait(bp);
 
 		if (oldflags & B_DELWRI) {
 			s = splbio();
 			reassignbuf(bp);
 			splx(s);
 		}
 		brelse(bp);
 		return (rtval);
 	}
 
 	return (0);
 }
 
 /*
  * nfs special file access vnode op.
  * Essentially just get vattr and then imitate iaccess() since the device is
  * local to the client.
  */
 static int
 nfsspec_access(struct vop_access_args *ap)
 {
 	struct vattr *vap;
 	struct ucred *cred = ap->a_cred;
 	struct vnode *vp = ap->a_vp;
 	accmode_t accmode = ap->a_accmode;
 	struct vattr vattr;
 	int error;
 
 	/*
 	 * Disallow write attempts on filesystems mounted read-only;
 	 * unless the file is a socket, fifo, or a block or character
 	 * device resident on the filesystem.
 	 */
 	if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) {
 		switch (vp->v_type) {
 		case VREG:
 		case VDIR:
 		case VLNK:
 			return (EROFS);
 		default:
 			break;
 		}
 	}
 	vap = &vattr;
 	error = VOP_GETATTR(vp, vap, cred);
 	if (error)
 		goto out;
 	error  = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid,
 	    accmode, cred, NULL);
 out:
 	return error;
 }
 
 /*
  * Read wrapper for fifos.
  */
 static int
 nfsfifo_read(struct vop_read_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 	int error;
 
 	/*
 	 * Set access flag.
 	 */
 	mtx_lock(&np->n_mtx);
 	np->n_flag |= NACC;
 	vfs_timestamp(&np->n_atim);
 	mtx_unlock(&np->n_mtx);
 	error = fifo_specops.vop_read(ap);
 	return error;	
 }
 
 /*
  * Write wrapper for fifos.
  */
 static int
 nfsfifo_write(struct vop_write_args *ap)
 {
 	struct nfsnode *np = VTONFS(ap->a_vp);
 
 	/*
 	 * Set update flag.
 	 */
 	mtx_lock(&np->n_mtx);
 	np->n_flag |= NUPD;
 	vfs_timestamp(&np->n_mtim);
 	mtx_unlock(&np->n_mtx);
 	return(fifo_specops.vop_write(ap));
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the nfsnode then do fifo close.
  */
 static int
 nfsfifo_close(struct vop_close_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct nfsnode *np = VTONFS(vp);
 	struct vattr vattr;
 	struct timespec ts;
 
 	mtx_lock(&np->n_mtx);
 	if (np->n_flag & (NACC | NUPD)) {
 		vfs_timestamp(&ts);
 		if (np->n_flag & NACC)
 			np->n_atim = ts;
 		if (np->n_flag & NUPD)
 			np->n_mtim = ts;
 		np->n_flag |= NCHG;
 		if (vrefcnt(vp) == 1 &&
 		    (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
 			VATTR_NULL(&vattr);
 			if (np->n_flag & NACC)
 				vattr.va_atime = np->n_atim;
 			if (np->n_flag & NUPD)
 				vattr.va_mtime = np->n_mtim;
 			mtx_unlock(&np->n_mtx);
 			(void)VOP_SETATTR(vp, &vattr, ap->a_cred);
 			goto out;
 		}
 	}
 	mtx_unlock(&np->n_mtx);
 out:
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Just call ncl_writebp() with the force argument set to 1.
  *
  * NOTE: B_DONE may or may not be set in a_bp on call.
  */
 static int
 nfs_bwrite(struct buf *bp)
 {
 
 	return (ncl_writebp(bp, 1, curthread));
 }
 
 struct buf_ops buf_ops_newnfs = {
 	.bop_name	=	"buf_ops_nfs",
 	.bop_write	=	nfs_bwrite,
 	.bop_strategy	=	bufstrategy,
 	.bop_sync	=	bufsync,
 	.bop_bdflush	=	bufbdflush,
 };
 
 static int
 nfs_getacl(struct vop_getacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_getacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 	    NULL);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 static int
 nfs_setacl(struct vop_setacl_args *ap)
 {
 	int error;
 
 	if (ap->a_type != ACL_TYPE_NFS4)
 		return (EOPNOTSUPP);
 	error = nfsrpc_setacl(ap->a_vp, ap->a_cred, ap->a_td, ap->a_aclp,
 	    NULL);
 	if (error > NFSERR_STALE) {
 		(void) nfscl_maperr(ap->a_td, error, (uid_t)0, (gid_t)0);
 		error = EPERM;
 	}
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to nfs filesystems.
  */
 static int
 nfs_pathconf(struct vop_pathconf_args *ap)
 {
 	struct nfsv3_pathconf pc;
 	struct nfsvattr nfsva;
 	struct vnode *vp = ap->a_vp;
 	struct thread *td = curthread;
 	int attrflag, error;
 
 	if ((NFS_ISV34(vp) && (ap->a_name == _PC_LINK_MAX ||
 	    ap->a_name == _PC_NAME_MAX || ap->a_name == _PC_CHOWN_RESTRICTED ||
 	    ap->a_name == _PC_NO_TRUNC)) ||
 	    (NFS_ISV4(vp) && ap->a_name == _PC_ACL_NFS4)) {
 		/*
 		 * Since only the above 4 a_names are returned by the NFSv3
 		 * Pathconf RPC, there is no point in doing it for others.
 		 * For NFSv4, the Pathconf RPC (actually a Getattr Op.) can
 		 * be used for _PC_NFS4_ACL as well.
 		 */
 		error = nfsrpc_pathconf(vp, &pc, td->td_ucred, td, &nfsva,
 		    &attrflag, NULL);
 		if (attrflag != 0)
 			(void) nfscl_loadattrcache(&vp, &nfsva, NULL, NULL, 0,
 			    1);
 		if (error != 0)
 			return (error);
 	} else {
 		/*
 		 * For NFSv2 (or NFSv3 when not one of the above 4 a_names),
 		 * just fake them.
 		 */
 		pc.pc_linkmax = LINK_MAX;
 		pc.pc_namemax = NFS_MAXNAMLEN;
 		pc.pc_notrunc = 1;
 		pc.pc_chownrestricted = 1;
 		pc.pc_caseinsensitive = 0;
 		pc.pc_casepreserving = 1;
 		error = 0;
 	}
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = pc.pc_linkmax;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = pc.pc_namemax;
 		break;
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		break;
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = pc.pc_chownrestricted;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = pc.pc_notrunc;
 		break;
 	case _PC_ACL_EXTENDED:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ACL_NFS4:
 		if (NFS_ISV4(vp) && nfsrv_useacl != 0 && attrflag != 0 &&
 		    NFSISSET_ATTRBIT(&nfsva.na_suppattr, NFSATTRBIT_ACL))
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 		break;
 	case _PC_ACL_PATH_MAX:
 		if (NFS_ISV4(vp))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 		break;
 	case _PC_MAC_PRESENT:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ASYNC_IO:
 		/* _PC_ASYNC_IO should have been handled by upper layers. */
 		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 		error = EINVAL;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		if (NFS_ISV34(vp))
 			*ap->a_retval = 64;
 		else
 			*ap->a_retval = 32;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = NFS_MAXPATHLEN;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
Index: head/sys/fs/tmpfs/tmpfs_vnops.c
===================================================================
--- head/sys/fs/tmpfs/tmpfs_vnops.c	(revision 311521)
+++ head/sys/fs/tmpfs/tmpfs_vnops.c	(revision 311522)
@@ -1,1425 +1,1425 @@
 /*	$NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $	*/
 
 /*-
  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
  * All rights reserved.
  *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
  * 2005 program.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*
  * tmpfs vnode interface.
  */
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/fcntl.h>
 #include <sys/lockf.h>
 #include <sys/lock.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/rwlock.h>
 #include <sys/sched.h>
 #include <sys/stat.h>
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pager.h>
 
 #include <fs/tmpfs/tmpfs_vnops.h>
 #include <fs/tmpfs/tmpfs.h>
 
 SYSCTL_DECL(_vfs_tmpfs);
 
 static volatile int tmpfs_rename_restarts;
 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &tmpfs_rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 static int
 tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags,
     struct vnode **rvp)
 {
 
 	return (tmpfs_alloc_vp(mp, arg, lkflags, rvp));
 }
 
 static int
 tmpfs_lookup(struct vop_cachedlookup_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *dnode;
 	int error;
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	*vpp = NULLVP;
 
 	/* Check accessibility of requested node as a first step. */
 	error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread);
 	if (error != 0)
 		goto out;
 
 	/* We cannot be requesting the parent directory of the root node. */
 	MPASS(IMPLIES(dnode->tn_type == VDIR &&
 	    dnode->tn_dir.tn_parent == dnode,
 	    !(cnp->cn_flags & ISDOTDOT)));
 
 	TMPFS_ASSERT_LOCKED(dnode);
 	if (dnode->tn_dir.tn_parent == NULL) {
 		error = ENOENT;
 		goto out;
 	}
 	if (cnp->cn_flags & ISDOTDOT) {
 		error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc,
 		    dnode->tn_dir.tn_parent, cnp->cn_lkflags, vpp);
 		if (error != 0)
 			goto out;
 	} else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') {
 		VREF(dvp);
 		*vpp = dvp;
 		error = 0;
 	} else {
 		de = tmpfs_dir_lookup(dnode, NULL, cnp);
 		if (de != NULL && de->td_node == NULL)
 			cnp->cn_flags |= ISWHITEOUT;
 		if (de == NULL || de->td_node == NULL) {
 			/* The entry was not found in the directory.
 			 * This is OK if we are creating or renaming an
 			 * entry and are working on the last component of
 			 * the path name. */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == CREATE || \
 			    cnp->cn_nameiop == RENAME ||
 			    (cnp->cn_nameiop == DELETE &&
 			    cnp->cn_flags & DOWHITEOUT &&
 			    cnp->cn_flags & ISWHITEOUT))) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    cnp->cn_thread);
 				if (error != 0)
 					goto out;
 
 				/* Keep the component name in the buffer for
 				 * future uses. */
 				cnp->cn_flags |= SAVENAME;
 
 				error = EJUSTRETURN;
 			} else
 				error = ENOENT;
 		} else {
 			struct tmpfs_node *tnode;
 
 			/* The entry was found, so get its associated
 			 * tmpfs_node. */
 			tnode = de->td_node;
 
 			/* If we are not at the last path component and
 			 * found a non-directory or non-link entry (which
 			 * may itself be pointing to a directory), raise
 			 * an error. */
 			if ((tnode->tn_type != VDIR &&
 			    tnode->tn_type != VLNK) &&
 			    !(cnp->cn_flags & ISLASTCN)) {
 				error = ENOTDIR;
 				goto out;
 			}
 
 			/* If we are deleting or renaming the entry, keep
 			 * track of its tmpfs_dirent so that it can be
 			 * easily deleted later. */
 			if ((cnp->cn_flags & ISLASTCN) &&
 			    (cnp->cn_nameiop == DELETE ||
 			    cnp->cn_nameiop == RENAME)) {
 				error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred,
 				    cnp->cn_thread);
 				if (error != 0)
 					goto out;
 
 				/* Allocate a new vnode on the matching entry. */
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 
 				if ((dnode->tn_mode & S_ISTXT) &&
 				  VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) &&
 				  VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) {
 					error = EPERM;
 					vput(*vpp);
 					*vpp = NULL;
 					goto out;
 				}
 				cnp->cn_flags |= SAVENAME;
 			} else {
 				error = tmpfs_alloc_vp(dvp->v_mount, tnode,
 				    cnp->cn_lkflags, vpp);
 				if (error != 0)
 					goto out;
 			}
 		}
 	}
 
 	/* Store the result of this lookup in the cache.  Avoid this if the
 	 * request was for creation, as it does not improve timings on
 	 * emprical tests. */
 	if ((cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(dvp, *vpp, cnp);
 
 out:
 	/* If there were no errors, *vpp cannot be null and it must be
 	 * locked. */
 	MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp)));
 
 	return error;
 }
 
 static int
 tmpfs_create(struct vop_create_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	int error;
 
 	MPASS(vap->va_type == VREG || vap->va_type == VSOCK);
 
 	error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 	if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(dvp, *vpp, cnp);
 	return (error);
 }
 
 static int
 tmpfs_mknod(struct vop_mknod_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
 	    vap->va_type != VFIFO)
 		return EINVAL;
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 }
 
 static int
 tmpfs_open(struct vop_open_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	int mode = v->a_mode;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* The file is still active but all its names have been removed
 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
 	 * it is about to die. */
 	if (node->tn_links < 1)
 		return (ENOENT);
 
 	/* If the file is marked append-only, deny write requests. */
 	if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE)
 		error = EPERM;
 	else {
 		error = 0;
 		/* For regular files, the call below is nop. */
 		KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags &
 		    OBJ_DEAD) == 0, ("dead object"));
 		vnode_create_vobject(vp, node->tn_size, v->a_td);
 	}
 
 	MPASS(VOP_ISLOCKED(vp));
 	return error;
 }
 
 static int
 tmpfs_close(struct vop_close_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	/* Update node times. */
 	tmpfs_update(vp);
 
 	return (0);
 }
 
 int
 tmpfs_access(struct vop_access_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	accmode_t accmode = v->a_accmode;
 	struct ucred *cred = v->a_cred;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	switch (vp->v_type) {
 	case VDIR:
 		/* FALLTHROUGH */
 	case VLNK:
 		/* FALLTHROUGH */
 	case VREG:
 		if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) {
 			error = EROFS;
 			goto out;
 		}
 		break;
 
 	case VBLK:
 		/* FALLTHROUGH */
 	case VCHR:
 		/* FALLTHROUGH */
 	case VSOCK:
 		/* FALLTHROUGH */
 	case VFIFO:
 		break;
 
 	default:
 		error = EINVAL;
 		goto out;
 	}
 
 	if (accmode & VWRITE && node->tn_flags & IMMUTABLE) {
 		error = EPERM;
 		goto out;
 	}
 
 	error = vaccess(vp->v_type, node->tn_mode, node->tn_uid,
 	    node->tn_gid, accmode, cred, NULL);
 
 out:
 	MPASS(VOP_ISLOCKED(vp));
 
 	return error;
 }
 
 int
 tmpfs_getattr(struct vop_getattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	vm_object_t obj;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	tmpfs_update(vp);
 
 	vap->va_type = vp->v_type;
 	vap->va_mode = node->tn_mode;
 	vap->va_nlink = node->tn_links;
 	vap->va_uid = node->tn_uid;
 	vap->va_gid = node->tn_gid;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_fileid = node->tn_id;
 	vap->va_size = node->tn_size;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_atime = node->tn_atime;
 	vap->va_mtime = node->tn_mtime;
 	vap->va_ctime = node->tn_ctime;
 	vap->va_birthtime = node->tn_birthtime;
 	vap->va_gen = node->tn_gen;
 	vap->va_flags = node->tn_flags;
 	vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ?
 		node->tn_rdev : NODEV;
 	if (vp->v_type == VREG) {
 		obj = node->tn_reg.tn_aobj;
 		vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE;
 	} else
 		vap->va_bytes = node->tn_size;
 	vap->va_filerev = 0;
 
 	return 0;
 }
 
 int
 tmpfs_setattr(struct vop_setattr_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct vattr *vap = v->a_vap;
 	struct ucred *cred = v->a_cred;
 	struct thread *td = curthread;
 
 	int error;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	error = 0;
 
 	/* Abort if any unsettable attribute is given. */
 	if (vap->va_type != VNON ||
 	    vap->va_nlink != VNOVAL ||
 	    vap->va_fsid != VNOVAL ||
 	    vap->va_fileid != VNOVAL ||
 	    vap->va_blocksize != VNOVAL ||
 	    vap->va_gen != VNOVAL ||
 	    vap->va_rdev != VNOVAL ||
 	    vap->va_bytes != VNOVAL)
 		error = EINVAL;
 
 	if (error == 0 && (vap->va_flags != VNOVAL))
 		error = tmpfs_chflags(vp, vap->va_flags, cred, td);
 
 	if (error == 0 && (vap->va_size != VNOVAL))
 		error = tmpfs_chsize(vp, vap->va_size, cred, td);
 
 	if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL))
 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td);
 
 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL))
 		error = tmpfs_chmod(vp, vap->va_mode, cred, td);
 
 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
 	    vap->va_atime.tv_nsec != VNOVAL) ||
 	    (vap->va_mtime.tv_sec != VNOVAL &&
 	    vap->va_mtime.tv_nsec != VNOVAL) ||
 	    (vap->va_birthtime.tv_sec != VNOVAL &&
 	    vap->va_birthtime.tv_nsec != VNOVAL)))
 		error = tmpfs_chtimes(vp, vap, cred, td);
 
 	/* Update the node times.  We give preference to the error codes
 	 * generated by this function rather than the ones that may arise
 	 * from tmpfs_update. */
 	tmpfs_update(vp);
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	return error;
 }
 
 static int
 tmpfs_read(struct vop_read_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	if (vp->v_type != VREG)
 		return (EISDIR);
 	uio = v->a_uio;
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	node = VP_TO_TMPFS_NODE(vp);
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 	return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio));
 }
 
 static int
 tmpfs_write(struct vop_write_args *v)
 {
 	struct vnode *vp;
 	struct uio *uio;
 	struct tmpfs_node *node;
 	off_t oldsize;
 	int error, ioflag;
 
 	vp = v->a_vp;
 	uio = v->a_uio;
 	ioflag = v->a_ioflag;
 	error = 0;
 	node = VP_TO_TMPFS_NODE(vp);
 	oldsize = node->tn_size;
 
 	if (uio->uio_offset < 0 || vp->v_type != VREG)
 		return (EINVAL);
 	if (uio->uio_resid == 0)
 		return (0);
 	if (ioflag & IO_APPEND)
 		uio->uio_offset = node->tn_size;
 	if (uio->uio_offset + uio->uio_resid >
 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize)
 		return (EFBIG);
 	if (vn_rlimit_fsize(vp, uio, uio->uio_td))
 		return (EFBIG);
 	if (uio->uio_offset + uio->uio_resid > node->tn_size) {
 		error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid,
 		    FALSE);
 		if (error != 0)
 			goto out;
 	}
 
 	error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio);
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED |
 	    TMPFS_NODE_CHANGED;
 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
 		if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0))
 			node->tn_mode &= ~(S_ISUID | S_ISGID);
 	}
 	if (error != 0)
 		(void)tmpfs_reg_resize(vp, oldsize, TRUE);
 
 out:
 	MPASS(IMPLIES(error == 0, uio->uio_resid == 0));
 	MPASS(IMPLIES(error != 0, oldsize == node->tn_size));
 
 	return (error);
 }
 
 static int
 tmpfs_fsync(struct vop_fsync_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmpfs_check_mtime(vp);
 	tmpfs_update(vp);
 
 	return 0;
 }
 
 static int
 tmpfs_remove(struct vop_remove_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	if (vp->v_type == VDIR) {
 		error = EISDIR;
 		goto out;
 	}
 
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(de != NULL);
 
 	/* Files marked as immutable or append-only cannot be deleted. */
 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
 	    (dnode->tn_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Remove the entry from the directory; as it is a file, we do not
 	 * have to change the number of hard links of the directory. */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED;
 	error = 0;
 
 out:
 
 	return error;
 }
 
 static int
 tmpfs_link(struct vop_link_args *v)
 {
 	struct vnode *dvp = v->a_tdvp;
 	struct vnode *vp = v->a_vp;
 	struct componentname *cnp = v->a_cnp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(cnp->cn_flags & HASBUF);
 	MPASS(dvp != vp); /* XXX When can this be false? */
 	node = VP_TO_TMPFS_NODE(vp);
 
 	/* Ensure that we do not overflow the maximum number of links imposed
 	 * by the system. */
 	MPASS(node->tn_links <= LINK_MAX);
 	if (node->tn_links == LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 
 	/* We cannot create links of files marked immutable or append-only. */
 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* Allocate a new directory entry to represent the node. */
 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
 	    cnp->cn_nameptr, cnp->cn_namelen, &de);
 	if (error != 0)
 		goto out;
 
 	/* Insert the new directory entry into the appropriate directory. */
 	if (cnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 	tmpfs_dir_attach(dvp, de);
 
 	/* vp link count has changed, so update node times. */
 	node->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(vp);
 
 	error = 0;
 
 out:
 	return error;
 }
 
 /*
  * We acquire all but fdvp locks using non-blocking acquisitions.  If we
  * fail to acquire any lock in the path we will drop all held locks,
  * acquire the new lock in a blocking fashion, and then release it and
  * restart the rename.  This acquire/release step ensures that we do not
  * spin on a lock waiting for release.  On error release all vnode locks
  * and decrement references the way tmpfs_rename() would do.
  */
 static int
 tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp,
     struct vnode *tdvp, struct vnode **tvpp,
     struct componentname *fcnp, struct componentname *tcnp)
 {
 	struct vnode *nvp;
 	struct mount *mp;
 	struct tmpfs_dirent *de;
 	int error, restarts = 0;
 
 	VOP_UNLOCK(tdvp, 0);
 	if (*tvpp != NULL && *tvpp != tdvp)
 		VOP_UNLOCK(*tvpp, 0);
 	mp = fdvp->v_mount;
 
 relock:
 	restarts += 1;
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp, 0);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp, 0);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp);
 	if (de == NULL) {
 		VOP_UNLOCK(fdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto releout;
 	}
 	error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error != 0) {
 		VOP_UNLOCK(fdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		if (error != EBUSY)
 			goto releout;
 		error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp, 0);
 		/*
 		 * Concurrent rename race.
 		 */
 		if (nvp == tdvp) {
 			vrele(nvp);
 			error = EINVAL;
 			goto releout;
 		}
 		vrele(*fvpp);
 		*fvpp = nvp;
 		goto relock;
 	}
 	vrele(*fvpp);
 	*fvpp = nvp;
 	VOP_UNLOCK(*fvpp, 0);
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp);
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (de == NULL && *tvpp != NULL) {
 		vrele(*tvpp);
 		*tvpp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (de != NULL) {
 		nvp = NULL;
 		error = tmpfs_alloc_vp(mp, de->td_node,
 		    LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (*tvpp != NULL)
 			vrele(*tvpp);
 		*tvpp = nvp;
 		if (error != 0) {
 			VOP_UNLOCK(fdvp, 0);
 			VOP_UNLOCK(tdvp, 0);
 			if (error != EBUSY)
 				goto releout;
 			error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE,
 			    &nvp);
 			if (error != 0)
 				goto releout;
 			VOP_UNLOCK(nvp, 0);
 			/*
 			 * fdvp contains fvp, thus tvp (=fdvp) is not empty.
 			 */
 			if (nvp == fdvp) {
 				error = ENOTEMPTY;
 				goto releout;
 			}
 			goto relock;
 		}
 	}
 	tmpfs_rename_restarts += restarts;
 
 	return (0);
 
 releout:
 	vrele(fdvp);
 	vrele(*fvpp);
 	vrele(tdvp);
 	if (*tvpp != NULL)
 		vrele(*tvpp);
 	tmpfs_rename_restarts += restarts;
 
 	return (error);
 }
 
 static int
 tmpfs_rename(struct vop_rename_args *v)
 {
 	struct vnode *fdvp = v->a_fdvp;
 	struct vnode *fvp = v->a_fvp;
 	struct componentname *fcnp = v->a_fcnp;
 	struct vnode *tdvp = v->a_tdvp;
 	struct vnode *tvp = v->a_tvp;
 	struct componentname *tcnp = v->a_tcnp;
 	struct mount *mp = NULL;
 
 	char *newname;
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *fdnode;
 	struct tmpfs_node *fnode;
 	struct tmpfs_node *tnode;
 	struct tmpfs_node *tdnode;
 
 	MPASS(VOP_ISLOCKED(tdvp));
 	MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp)));
 	MPASS(fcnp->cn_flags & HASBUF);
 	MPASS(tcnp->cn_flags & HASBUF);
 
 	/* Disallow cross-device renames.
 	 * XXX Why isn't this done by the caller? */
 	if (fvp->v_mount != tdvp->v_mount ||
 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
 		error = EXDEV;
 		goto out;
 	}
 
 	/* If source and target are the same file, there is nothing to do. */
 	if (fvp == tvp) {
 		error = 0;
 		goto out;
 	}
 
 	/* If we need to move the directory between entries, lock the
 	 * source so that we can safely operate on it. */
 	if (fdvp != tdvp && fdvp != tvp) {
 		if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 			mp = tdvp->v_mount;
 			error = vfs_busy(mp, 0);
 			if (error != 0) {
 				mp = NULL;
 				goto out;
 			}
 			error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp,
 			    fcnp, tcnp);
 			if (error != 0) {
 				vfs_unbusy(mp);
 				return (error);
 			}
 			ASSERT_VOP_ELOCKED(fdvp,
 			    "tmpfs_rename: fdvp not locked");
 			ASSERT_VOP_ELOCKED(tdvp,
 			    "tmpfs_rename: tdvp not locked");
 			if (tvp != NULL)
 				ASSERT_VOP_ELOCKED(tvp,
 				    "tmpfs_rename: tvp not locked");
 			if (fvp == tvp) {
 				error = 0;
 				goto out_locked;
 			}
 		}
 	}
 
 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
 	tdnode = VP_TO_TMPFS_DIR(tdvp);
 	tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp);
 	fdnode = VP_TO_TMPFS_DIR(fdvp);
 	fnode = VP_TO_TMPFS_NODE(fvp);
 	de = tmpfs_dir_lookup(fdnode, fnode, fcnp);
 
 	/* Entry can disappear before we lock fdvp,
 	 * also avoid manipulating '.' and '..' entries. */
 	if (de == NULL) {
 		if ((fcnp->cn_flags & ISDOTDOT) != 0 ||
 		    (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.'))
 			error = EINVAL;
 		else
 			error = ENOENT;
 		goto out_locked;
 	}
 	MPASS(de->td_node == fnode);
 
 	/* If re-naming a directory to another preexisting directory
 	 * ensure that the target directory is empty so that its
 	 * removal causes no side effects.
 	 * Kern_rename guarantees the destination to be a directory
 	 * if the source is one. */
 	if (tvp != NULL) {
 		MPASS(tnode != NULL);
 
 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
 			error = EPERM;
 			goto out_locked;
 		}
 
 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
 			if (tnode->tn_size > 0) {
 				error = ENOTEMPTY;
 				goto out_locked;
 			}
 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
 			error = ENOTDIR;
 			goto out_locked;
 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
 			error = EISDIR;
 			goto out_locked;
 		} else {
 			MPASS(fnode->tn_type != VDIR &&
 				tnode->tn_type != VDIR);
 		}
 	}
 
 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
 		error = EPERM;
 		goto out_locked;
 	}
 
 	/* Ensure that we have enough memory to hold the new name, if it
 	 * has to be changed. */
 	if (fcnp->cn_namelen != tcnp->cn_namelen ||
 	    bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) {
 		newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK);
 	} else
 		newname = NULL;
 
 	/* If the node is being moved to another directory, we have to do
 	 * the move. */
 	if (fdnode != tdnode) {
 		/* In case we are moving a directory, we have to adjust its
 		 * parent to point to the new parent. */
 		if (de->td_node->tn_type == VDIR) {
 			struct tmpfs_node *n;
 
 			/* Ensure the target directory is not a child of the
 			 * directory being moved.  Otherwise, we'd end up
 			 * with stale nodes. */
 			n = tdnode;
 			/* TMPFS_LOCK garanties that no nodes are freed while
 			 * traversing the list. Nodes can only be marked as
 			 * removed: tn_parent == NULL. */
 			TMPFS_LOCK(tmp);
 			TMPFS_NODE_LOCK(n);
 			while (n != n->tn_dir.tn_parent) {
 				struct tmpfs_node *parent;
 
 				if (n == fnode) {
 					TMPFS_NODE_UNLOCK(n);
 					TMPFS_UNLOCK(tmp);
 					error = EINVAL;
 					if (newname != NULL)
 						    free(newname, M_TMPFSNAME);
 					goto out_locked;
 				}
 				parent = n->tn_dir.tn_parent;
 				TMPFS_NODE_UNLOCK(n);
 				if (parent == NULL) {
 					n = NULL;
 					break;
 				}
 				TMPFS_NODE_LOCK(parent);
 				if (parent->tn_dir.tn_parent == NULL) {
 					TMPFS_NODE_UNLOCK(parent);
 					n = NULL;
 					break;
 				}
 				n = parent;
 			}
 			TMPFS_UNLOCK(tmp);
 			if (n == NULL) {
 				error = EINVAL;
 				if (newname != NULL)
 					    free(newname, M_TMPFSNAME);
 				goto out_locked;
 			}
 			TMPFS_NODE_UNLOCK(n);
 
 			/* Adjust the parent pointer. */
 			TMPFS_VALIDATE_DIR(fnode);
 			TMPFS_NODE_LOCK(de->td_node);
 			de->td_node->tn_dir.tn_parent = tdnode;
 			TMPFS_NODE_UNLOCK(de->td_node);
 
 			/* As a result of changing the target of the '..'
 			 * entry, the link count of the source and target
 			 * directories has to be adjusted. */
 			TMPFS_NODE_LOCK(tdnode);
 			TMPFS_ASSERT_LOCKED(tdnode);
 			tdnode->tn_links++;
 			TMPFS_NODE_UNLOCK(tdnode);
 
 			TMPFS_NODE_LOCK(fdnode);
 			TMPFS_ASSERT_LOCKED(fdnode);
 			fdnode->tn_links--;
 			TMPFS_NODE_UNLOCK(fdnode);
 		}
 	}
 
 	/* Do the move: just remove the entry from the source directory
 	 * and insert it into the target one. */
 	tmpfs_dir_detach(fdvp, de);
 
 	if (fcnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(fdvp, fcnp);
 	if (tcnp->cn_flags & ISWHITEOUT)
 		tmpfs_dir_whiteout_remove(tdvp, tcnp);
 
 	/* If the name has changed, we need to make it effective by changing
 	 * it in the directory entry. */
 	if (newname != NULL) {
 		MPASS(tcnp->cn_namelen <= MAXNAMLEN);
 
 		free(de->ud.td_name, M_TMPFSNAME);
 		de->ud.td_name = newname;
 		tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen);
 
 		fnode->tn_status |= TMPFS_NODE_CHANGED;
 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
 	}
 
 	/* If we are overwriting an entry, we have to remove the old one
 	 * from the target directory. */
 	if (tvp != NULL) {
 		struct tmpfs_dirent *tde;
 
 		/* Remove the old entry from the target directory. */
 		tde = tmpfs_dir_lookup(tdnode, tnode, tcnp);
 		tmpfs_dir_detach(tdvp, tde);
 
 		/* Free the directory entry we just deleted.  Note that the
 		 * node referred by it will not be removed until the vnode is
 		 * really reclaimed. */
 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
 	}
 
 	tmpfs_dir_attach(tdvp, de);
 
 	cache_purge(fvp);
 	if (tvp != NULL)
 		cache_purge(tvp);
 	cache_purge_negative(tdvp);
 
 	error = 0;
 
 out_locked:
 	if (fdvp != tdvp && fdvp != tvp)
 		VOP_UNLOCK(fdvp, 0);
 
 out:
 	/* Release target nodes. */
 	/* XXX: I don't understand when tdvp can be the same as tvp, but
 	 * other code takes care of this... */
 	if (tdvp == tvp)
 		vrele(tdvp);
 	else
 		vput(tdvp);
 	if (tvp != NULL)
 		vput(tvp);
 
 	/* Release source nodes. */
 	vrele(fdvp);
 	vrele(fvp);
 
 	if (mp != NULL)
 		vfs_unbusy(mp);
 
 	return error;
 }
 
 static int
 tmpfs_mkdir(struct vop_mkdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 
 	MPASS(vap->va_type == VDIR);
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL);
 }
 
 static int
 tmpfs_rmdir(struct vop_rmdir_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode *vp = v->a_vp;
 
 	int error;
 	struct tmpfs_dirent *de;
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *dnode;
 	struct tmpfs_node *node;
 
 	MPASS(VOP_ISLOCKED(dvp));
 	MPASS(VOP_ISLOCKED(vp));
 
 	tmp = VFS_TO_TMPFS(dvp->v_mount);
 	dnode = VP_TO_TMPFS_DIR(dvp);
 	node = VP_TO_TMPFS_DIR(vp);
 
 	/* Directories with more than two entries ('.' and '..') cannot be
 	 * removed. */
 	 if (node->tn_size > 0) {
 		 error = ENOTEMPTY;
 		 goto out;
 	 }
 
 	if ((dnode->tn_flags & APPEND)
 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 
 	/* This invariant holds only if we are not trying to remove "..".
 	  * We checked for that above so this is safe now. */
 	MPASS(node->tn_dir.tn_parent == dnode);
 
 	/* Get the directory entry associated with node (vp).  This was
 	 * filled by tmpfs_lookup while looking up the entry. */
 	de = tmpfs_dir_lookup(dnode, node, v->a_cnp);
 	MPASS(TMPFS_DIRENT_MATCHES(de,
 	    v->a_cnp->cn_nameptr,
 	    v->a_cnp->cn_namelen));
 
 	/* Check flags to see if we are allowed to remove the directory. */
 	if (dnode->tn_flags & APPEND
 		|| node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 
 
 	/* Detach the directory entry from the directory (dnode). */
 	tmpfs_dir_detach(dvp, de);
 	if (v->a_cnp->cn_flags & DOWHITEOUT)
 		tmpfs_dir_whiteout_add(dvp, v->a_cnp);
 
 	/* No vnode should be allocated for this entry from this point */
 	TMPFS_NODE_LOCK(node);
 	TMPFS_ASSERT_ELOCKED(node);
 	node->tn_links--;
 	node->tn_dir.tn_parent = NULL;
 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
 	    TMPFS_NODE_MODIFIED;
 
 	TMPFS_NODE_UNLOCK(node);
 
 	TMPFS_NODE_LOCK(dnode);
 	TMPFS_ASSERT_ELOCKED(dnode);
 	dnode->tn_links--;
 	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
 	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
 	TMPFS_NODE_UNLOCK(dnode);
 
 	cache_purge(dvp);
 	cache_purge(vp);
 
 	/* Free the directory entry we just deleted.  Note that the node
 	 * referred by it will not be removed until the vnode is really
 	 * reclaimed. */
 	tmpfs_free_dirent(tmp, de);
 
 	/* Release the deleted vnode (will destroy the node, notify
 	 * interested parties and clean it from the cache). */
 
 	dnode->tn_status |= TMPFS_NODE_CHANGED;
 	tmpfs_update(dvp);
 
 	error = 0;
 
 out:
 	return error;
 }
 
 static int
 tmpfs_symlink(struct vop_symlink_args *v)
 {
 	struct vnode *dvp = v->a_dvp;
 	struct vnode **vpp = v->a_vpp;
 	struct componentname *cnp = v->a_cnp;
 	struct vattr *vap = v->a_vap;
 	char *target = v->a_target;
 
 #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */
 	MPASS(vap->va_type == VLNK);
 #else
 	vap->va_type = VLNK;
 #endif
 
 	return tmpfs_alloc_file(dvp, vpp, vap, cnp, target);
 }
 
 static int
 tmpfs_readdir(struct vop_readdir_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
 	int *eofflag = v->a_eofflag;
 	u_long **cookies = v->a_cookies;
 	int *ncookies = v->a_ncookies;
 
 	int error;
 	ssize_t startresid;
 	int maxcookies;
 	struct tmpfs_node *node;
 
 	/* This operation only makes sense on directory nodes. */
 	if (vp->v_type != VDIR)
 		return ENOTDIR;
 
 	maxcookies = 0;
 	node = VP_TO_TMPFS_DIR(vp);
 
 	startresid = uio->uio_resid;
 
 	/* Allocate cookies for NFS and compat modules. */
 	if (cookies != NULL && ncookies != NULL) {
 		maxcookies = howmany(node->tn_size,
 		    sizeof(struct tmpfs_dirent)) + 2;
 		*cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP,
 		    M_WAITOK);
 		*ncookies = 0;
 	}
 
 	if (cookies == NULL)
 		error = tmpfs_dir_getdents(node, uio, 0, NULL, NULL);
 	else
 		error = tmpfs_dir_getdents(node, uio, maxcookies, *cookies,
 		    ncookies);
 
 	/* Buffer was filled without hitting EOF. */
 	if (error == EJUSTRETURN)
 		error = (uio->uio_resid != startresid) ? 0 : EINVAL;
 
 	if (error != 0 && cookies != NULL && ncookies != NULL) {
 		free(*cookies, M_TEMP);
 		*cookies = NULL;
 		*ncookies = 0;
 	}
 
 	if (eofflag != NULL)
 		*eofflag =
 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
 
 	return error;
 }
 
 static int
 tmpfs_readlink(struct vop_readlink_args *v)
 {
 	struct vnode *vp = v->a_vp;
 	struct uio *uio = v->a_uio;
 
 	int error;
 	struct tmpfs_node *node;
 
 	MPASS(uio->uio_offset == 0);
 	MPASS(vp->v_type == VLNK);
 
 	node = VP_TO_TMPFS_NODE(vp);
 
 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
 	    uio);
 	node->tn_status |= TMPFS_NODE_ACCESSED;
 
 	return error;
 }
 
 static int
 tmpfs_inactive(struct vop_inactive_args *v)
 {
 	struct vnode *vp;
 	struct tmpfs_node *node;
 
 	vp = v->a_vp;
 	node = VP_TO_TMPFS_NODE(vp);
 	if (node->tn_links == 0)
 		vrecycle(vp);
 	else
 		tmpfs_check_mtime(vp);
 	return (0);
 }
 
 int
 tmpfs_reclaim(struct vop_reclaim_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_mount *tmp;
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 	tmp = VFS_TO_TMPFS(vp->v_mount);
 
 	if (vp->v_type == VREG)
 		tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
 	else
 		vnode_destroy_vobject(vp);
 	vp->v_object = NULL;
 	cache_purge(vp);
 
 	TMPFS_NODE_LOCK(node);
 	TMPFS_ASSERT_ELOCKED(node);
 	tmpfs_free_vp(vp);
 
 	/* If the node referenced by this vnode was deleted by the user,
 	 * we must free its associated data structures (now that the vnode
 	 * is being reclaimed). */
 	if (node->tn_links == 0 &&
 	    (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) {
 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
 		TMPFS_NODE_UNLOCK(node);
 		tmpfs_free_node(tmp, node);
 	} else
 		TMPFS_NODE_UNLOCK(node);
 
 	MPASS(vp->v_data == NULL);
 	return 0;
 }
 
 static int
 tmpfs_print(struct vop_print_args *v)
 {
 	struct vnode *vp = v->a_vp;
 
 	struct tmpfs_node *node;
 
 	node = VP_TO_TMPFS_NODE(vp);
 
-	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %d\n",
-	    node, node->tn_flags, node->tn_links);
+	printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n",
+	    node, node->tn_flags, (uintmax_t)node->tn_links);
 	printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n",
 	    node->tn_mode, node->tn_uid, node->tn_gid,
 	    (intmax_t)node->tn_size, node->tn_status);
 
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 
 	printf("\n");
 
 	return 0;
 }
 
 static int
 tmpfs_pathconf(struct vop_pathconf_args *v)
 {
 	int name = v->a_name;
 	register_t *retval = v->a_retval;
 
 	int error;
 
 	error = 0;
 
 	switch (name) {
 	case _PC_LINK_MAX:
 		*retval = LINK_MAX;
 		break;
 
 	case _PC_NAME_MAX:
 		*retval = NAME_MAX;
 		break;
 
 	case _PC_PATH_MAX:
 		*retval = PATH_MAX;
 		break;
 
 	case _PC_PIPE_BUF:
 		*retval = PIPE_BUF;
 		break;
 
 	case _PC_CHOWN_RESTRICTED:
 		*retval = 1;
 		break;
 
 	case _PC_NO_TRUNC:
 		*retval = 1;
 		break;
 
 	case _PC_SYNC_IO:
 		*retval = 1;
 		break;
 
 	case _PC_FILESIZEBITS:
 		*retval = 0; /* XXX Don't know which value should I return. */
 		break;
 
 	default:
 		error = EINVAL;
 	}
 
 	return error;
 }
 
 static int
 tmpfs_vptofh(struct vop_vptofh_args *ap)
 {
 	struct tmpfs_fid *tfhp;
 	struct tmpfs_node *node;
 
 	tfhp = (struct tmpfs_fid *)ap->a_fhp;
 	node = VP_TO_TMPFS_NODE(ap->a_vp);
 
 	tfhp->tf_len = sizeof(struct tmpfs_fid);
 	tfhp->tf_id = node->tn_id;
 	tfhp->tf_gen = node->tn_gen;
 
 	return (0);
 }
 
 static int
 tmpfs_whiteout(struct vop_whiteout_args *ap)
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct tmpfs_dirent *de;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		return (0);
 	case CREATE:
 		de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp);
 		if (de != NULL)
 			return (de->td_node == NULL ? 0 : EEXIST);
 		return (tmpfs_dir_whiteout_add(dvp, cnp));
 	case DELETE:
 		tmpfs_dir_whiteout_remove(dvp, cnp);
 		return (0);
 	default:
 		panic("tmpfs_whiteout: unknown op");
 	}
 }
 
 /*
  * vnode operations vector used for files stored in a tmpfs file system.
  */
 struct vop_vector tmpfs_vnodeop_entries = {
 	.vop_default =			&default_vnodeops,
 	.vop_lookup =			vfs_cache_lookup,
 	.vop_cachedlookup =		tmpfs_lookup,
 	.vop_create =			tmpfs_create,
 	.vop_mknod =			tmpfs_mknod,
 	.vop_open =			tmpfs_open,
 	.vop_close =			tmpfs_close,
 	.vop_access =			tmpfs_access,
 	.vop_getattr =			tmpfs_getattr,
 	.vop_setattr =			tmpfs_setattr,
 	.vop_read =			tmpfs_read,
 	.vop_write =			tmpfs_write,
 	.vop_fsync =			tmpfs_fsync,
 	.vop_remove =			tmpfs_remove,
 	.vop_link =			tmpfs_link,
 	.vop_rename =			tmpfs_rename,
 	.vop_mkdir =			tmpfs_mkdir,
 	.vop_rmdir =			tmpfs_rmdir,
 	.vop_symlink =			tmpfs_symlink,
 	.vop_readdir =			tmpfs_readdir,
 	.vop_readlink =			tmpfs_readlink,
 	.vop_inactive =			tmpfs_inactive,
 	.vop_reclaim =			tmpfs_reclaim,
 	.vop_print =			tmpfs_print,
 	.vop_pathconf =			tmpfs_pathconf,
 	.vop_vptofh =			tmpfs_vptofh,
 	.vop_whiteout =			tmpfs_whiteout,
 	.vop_bmap =			VOP_EOPNOTSUPP,
 };
 
Index: head/sys/ufs/ffs/ffs_softdep.c
===================================================================
--- head/sys/ufs/ffs/ffs_softdep.c	(revision 311521)
+++ head/sys/ufs/ffs/ffs_softdep.c	(revision 311522)
@@ -1,14400 +1,14402 @@
 /*-
  * Copyright 1998, 2000 Marshall Kirk McKusick.
  * Copyright 2009, 2010 Jeffrey W. Roberson <jeff@FreeBSD.org>
  * All rights reserved.
  *
  * The soft updates code is derived from the appendix of a University
  * of Michigan technical report (Gregory R. Ganger and Yale N. Patt,
  * "Soft Updates: A Solution to the Metadata Update Problem in File
  * Systems", CSE-TR-254-95, August 1995).
  *
  * Further information about soft updates can be obtained from:
  *
  *	Marshall Kirk McKusick		http://www.mckusick.com/softdep/
  *	1614 Oxford Street		mckusick@mckusick.com
  *	Berkeley, CA 94709-1608		+1-510-843-9542
  *	USA
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *	from: @(#)ffs_softdep.c	9.59 (McKusick) 6/21/00
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_ffs.h"
 #include "opt_quota.h"
 #include "opt_ddb.h"
 
 /*
  * For now we want the safety net that the DEBUG flag provides.
  */
 #ifndef DEBUG
 #define DEBUG
 #endif
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/kdb.h>
 #include <sys/kthread.h>
 #include <sys/ktr.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/priv.h>
 #include <sys/proc.h>
 #include <sys/racct.h>
 #include <sys/rwlock.h>
 #include <sys/stat.h>
 #include <sys/sysctl.h>
 #include <sys/syslog.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/softdep.h>
 #include <ufs/ffs/ffs_extern.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_object.h>
 
 #include <geom/geom.h>
 
 #include <ddb/ddb.h>
 
 #define	KTR_SUJ	0	/* Define to KTR_SPARE. */
 
 #ifndef SOFTUPDATES
 
 int
 softdep_flushfiles(oldmnt, flags, td)
 	struct mount *oldmnt;
 	int flags;
 	struct thread *td;
 {
 
 	panic("softdep_flushfiles called");
 }
 
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 
 	return (0);
 }
 
 void
 softdep_initialize()
 {
 
 	return;
 }
 
 void
 softdep_uninitialize()
 {
 
 	return;
 }
 
 void
 softdep_unmount(mp)
 	struct mount *mp;
 {
 
 	panic("softdep_unmount called");
 }
 
 void
 softdep_setup_sbupdate(ump, fs, bp)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 {
 
 	panic("softdep_setup_sbupdate called");
 }
 
 void
 softdep_setup_inomapdep(bp, ip, newinum, mode)
 	struct buf *bp;
 	struct inode *ip;
 	ino_t newinum;
 	int mode;
 {
 
 	panic("softdep_setup_inomapdep called");
 }
 
 void
 softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;
 	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int frags;
 	int oldfrags;
 {
 
 	panic("softdep_setup_blkmapdep called");
 }
 
 void
 softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	
 	panic("softdep_setup_allocdirect called");
 }
 
 void
 softdep_setup_allocext(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	
 	panic("softdep_setup_allocext called");
 }
 
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	struct buf *bp;
 	int ptrno;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	struct buf *nbp;
 {
 
 	panic("softdep_setup_allocindir_page called");
 }
 
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;
 	struct inode *ip;
 	struct buf *bp;
 	int ptrno;
 	ufs2_daddr_t newblkno;
 {
 
 	panic("softdep_setup_allocindir_meta called");
 }
 
 void
 softdep_journal_freeblocks(ip, cred, length, flags)
 	struct inode *ip;
 	struct ucred *cred;
 	off_t length;
 	int flags;
 {
 	
 	panic("softdep_journal_freeblocks called");
 }
 
 void
 softdep_journal_fsync(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_journal_fsync called");
 }
 
 void
 softdep_setup_freeblocks(ip, length, flags)
 	struct inode *ip;
 	off_t length;
 	int flags;
 {
 	
 	panic("softdep_setup_freeblocks called");
 }
 
 void
 softdep_freefile(pvp, ino, mode)
 		struct vnode *pvp;
 		ino_t ino;
 		int mode;
 {
 
 	panic("softdep_freefile called");
 }
 
 int
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	struct buf *bp;
 	struct inode *dp;
 	off_t diroffset;
 	ino_t newinum;
 	struct buf *newdirbp;
 	int isnewblk;
 {
 
 	panic("softdep_setup_directory_add called");
 }
 
 void
 softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
 	struct buf *bp;
 	struct inode *dp;
 	caddr_t base;
 	caddr_t oldloc;
 	caddr_t newloc;
 	int entrysize;
 {
 
 	panic("softdep_change_directoryentry_offset called");
 }
 
 void
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
 	struct inode *ip;
 	int isrmdir;
 {
 	
 	panic("softdep_setup_remove called");
 }
 
 void
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;
 	struct inode *dp;
 	struct inode *ip;
 	ino_t newinum;
 	int isrmdir;
 {
 
 	panic("softdep_setup_directory_change called");
 }
 
 void
 softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int frags;
 	struct workhead *wkhd;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_inofree(mp, bp, ino, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	struct workhead *wkhd;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_unlink(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_revert_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 void
 softdep_setup_dotdot_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 int
 softdep_prealloc(vp, waitok)
 	struct vnode *vp;
 	int waitok;
 {
 
 	panic("%s called", __FUNCTION__);
 }
 
 int
 softdep_journal_lookup(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 
 	return (ENOENT);
 }
 
 void
 softdep_change_linkcnt(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_change_linkcnt called");
 }
 
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;
 {
 
 	panic("softdep_load_inodeblock called");
 }
 
 void
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;
 	struct buf *bp;
 	int waitfor;
 {
 
 	panic("softdep_update_inodeblock called");
 }
 
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 
 	return (0);
 }
 
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 
 	return;
 }
 
 int
 softdep_flushworklist(oldmnt, countp, td)
 	struct mount *oldmnt;
 	int *countp;
 	struct thread *td;
 {
 
 	*countp = 0;
 	return (0);
 }
 
 int
 softdep_sync_metadata(struct vnode *vp)
 {
 
 	panic("softdep_sync_metadata called");
 }
 
 int
 softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
 {
 
 	panic("softdep_sync_buf called");
 }
 
 int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
 
 	panic("softdep_slowdown called");
 }
 
 int
 softdep_request_cleanup(fs, vp, cred, resource)
 	struct fs *fs;
 	struct vnode *vp;
 	struct ucred *cred;
 	int resource;
 {
 
 	return (0);
 }
 
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
 		      int softdep_depcnt,
 		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
 	struct bufobj *bo;
 	int error;
 	
 	(void) softdep_depcnt,
 	(void) softdep_accdepcnt;
 
 	bo = &devvp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 
 	MNT_ILOCK(mp);
 	while (mp->mnt_secondary_writes != 0) {
 		BO_UNLOCK(bo);
 		msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
 		    (PUSER - 1) | PDROP, "secwr", 0);
 		BO_LOCK(bo);
 		MNT_ILOCK(mp);
 	}
 
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
 	 * - Secondary writes occurred after start of vnode sync loop
 	 */
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||
 	    secondary_accwrites != mp->mnt_secondary_accwrites)
 		error = EAGAIN;
 	BO_UNLOCK(bo);
 	return (error);
 }
 
 void
 softdep_get_depcounts(struct mount *mp,
 		      int *softdepactivep,
 		      int *softdepactiveaccp)
 {
 	(void) mp;
 	*softdepactivep = 0;
 	*softdepactiveaccp = 0;
 }
 
 void
 softdep_buf_append(bp, wkhd)
 	struct buf *bp;
 	struct workhead *wkhd;
 {
 
 	panic("softdep_buf_appendwork called");
 }
 
 void
 softdep_inode_append(ip, cred, wkhd)
 	struct inode *ip;
 	struct ucred *cred;
 	struct workhead *wkhd;
 {
 
 	panic("softdep_inode_appendwork called");
 }
 
 void
 softdep_freework(wkhd)
 	struct workhead *wkhd;
 {
 
 	panic("softdep_freework called");
 }
 
 #else
 
 FEATURE(softupdates, "FFS soft-updates support");
 
 static SYSCTL_NODE(_debug, OID_AUTO, softdep, CTLFLAG_RW, 0,
     "soft updates stats");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, total, CTLFLAG_RW, 0,
     "total dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, highuse, CTLFLAG_RW, 0,
     "high use dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, current, CTLFLAG_RW, 0,
     "current dependencies allocated");
 static SYSCTL_NODE(_debug_softdep, OID_AUTO, write, CTLFLAG_RW, 0,
     "current dependencies written");
 
 unsigned long dep_current[D_LAST + 1];
 unsigned long dep_highuse[D_LAST + 1];
 unsigned long dep_total[D_LAST + 1];
 unsigned long dep_write[D_LAST + 1];
 
 #define	SOFTDEP_TYPE(type, str, long)					\
     static MALLOC_DEFINE(M_ ## type, #str, long);			\
     SYSCTL_ULONG(_debug_softdep_total, OID_AUTO, str, CTLFLAG_RD,	\
 	&dep_total[D_ ## type], 0, "");					\
     SYSCTL_ULONG(_debug_softdep_current, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_current[D_ ## type], 0, "");				\
     SYSCTL_ULONG(_debug_softdep_highuse, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_highuse[D_ ## type], 0, "");				\
     SYSCTL_ULONG(_debug_softdep_write, OID_AUTO, str, CTLFLAG_RD, 	\
 	&dep_write[D_ ## type], 0, "");
 
 SOFTDEP_TYPE(PAGEDEP, pagedep, "File page dependencies"); 
 SOFTDEP_TYPE(INODEDEP, inodedep, "Inode dependencies");
 SOFTDEP_TYPE(BMSAFEMAP, bmsafemap,
     "Block or frag allocated from cyl group map");
 SOFTDEP_TYPE(NEWBLK, newblk, "New block or frag allocation dependency");
 SOFTDEP_TYPE(ALLOCDIRECT, allocdirect, "Block or frag dependency for an inode");
 SOFTDEP_TYPE(INDIRDEP, indirdep, "Indirect block dependencies");
 SOFTDEP_TYPE(ALLOCINDIR, allocindir, "Block dependency for an indirect block");
 SOFTDEP_TYPE(FREEFRAG, freefrag, "Previously used frag for an inode");
 SOFTDEP_TYPE(FREEBLKS, freeblks, "Blocks freed from an inode");
 SOFTDEP_TYPE(FREEFILE, freefile, "Inode deallocated");
 SOFTDEP_TYPE(DIRADD, diradd, "New directory entry");
 SOFTDEP_TYPE(MKDIR, mkdir, "New directory");
 SOFTDEP_TYPE(DIRREM, dirrem, "Directory entry deleted");
 SOFTDEP_TYPE(NEWDIRBLK, newdirblk, "Unclaimed new directory block");
 SOFTDEP_TYPE(FREEWORK, freework, "free an inode block");
 SOFTDEP_TYPE(FREEDEP, freedep, "track a block free");
 SOFTDEP_TYPE(JADDREF, jaddref, "Journal inode ref add");
 SOFTDEP_TYPE(JREMREF, jremref, "Journal inode ref remove");
 SOFTDEP_TYPE(JMVREF, jmvref, "Journal inode ref move");
 SOFTDEP_TYPE(JNEWBLK, jnewblk, "Journal new block");
 SOFTDEP_TYPE(JFREEBLK, jfreeblk, "Journal free block");
 SOFTDEP_TYPE(JFREEFRAG, jfreefrag, "Journal free frag");
 SOFTDEP_TYPE(JSEG, jseg, "Journal segment");
 SOFTDEP_TYPE(JSEGDEP, jsegdep, "Journal segment complete");
 SOFTDEP_TYPE(SBDEP, sbdep, "Superblock write dependency");
 SOFTDEP_TYPE(JTRUNC, jtrunc, "Journal inode truncation");
 SOFTDEP_TYPE(JFSYNC, jfsync, "Journal fsync complete");
 
 static MALLOC_DEFINE(M_SENTINEL, "sentinel", "Worklist sentinel");
 
 static MALLOC_DEFINE(M_SAVEDINO, "savedino", "Saved inodes");
 static MALLOC_DEFINE(M_JBLOCKS, "jblocks", "Journal block locations");
 static MALLOC_DEFINE(M_MOUNTDATA, "softdep", "Softdep per-mount data");
 
 #define M_SOFTDEP_FLAGS	(M_WAITOK)
 
 /* 
  * translate from workitem type to memory type
  * MUST match the defines above, such that memtype[D_XXX] == M_XXX
  */
 static struct malloc_type *memtype[] = {
 	M_PAGEDEP,
 	M_INODEDEP,
 	M_BMSAFEMAP,
 	M_NEWBLK,
 	M_ALLOCDIRECT,
 	M_INDIRDEP,
 	M_ALLOCINDIR,
 	M_FREEFRAG,
 	M_FREEBLKS,
 	M_FREEFILE,
 	M_DIRADD,
 	M_MKDIR,
 	M_DIRREM,
 	M_NEWDIRBLK,
 	M_FREEWORK,
 	M_FREEDEP,
 	M_JADDREF,
 	M_JREMREF,
 	M_JMVREF,
 	M_JNEWBLK,
 	M_JFREEBLK,
 	M_JFREEFRAG,
 	M_JSEG,
 	M_JSEGDEP,
 	M_SBDEP,
 	M_JTRUNC,
 	M_JFSYNC,
 	M_SENTINEL
 };
 
 #define DtoM(type) (memtype[type])
 
 /*
  * Names of malloc types.
  */
 #define TYPENAME(type)  \
 	((unsigned)(type) <= D_LAST ? memtype[type]->ks_shortdesc : "???")
 /*
  * End system adaptation definitions.
  */
 
 #define	DOTDOT_OFFSET	offsetof(struct dirtemplate, dotdot_ino)
 #define	DOT_OFFSET	offsetof(struct dirtemplate, dot_ino)
 
 /*
  * Internal function prototypes.
  */
 static	void check_clear_deps(struct mount *);
 static	void softdep_error(char *, int);
 static	int softdep_process_worklist(struct mount *, int);
 static	int softdep_waitidle(struct mount *, int);
 static	void drain_output(struct vnode *);
 static	struct buf *getdirtybuf(struct buf *, struct rwlock *, int);
 static	int check_inodedep_free(struct inodedep *);
 static	void clear_remove(struct mount *);
 static	void clear_inodedeps(struct mount *);
 static	void unlinked_inodedep(struct mount *, struct inodedep *);
 static	void clear_unlinked_inodedep(struct inodedep *);
 static	struct inodedep *first_unlinked_inodedep(struct ufsmount *);
 static	int flush_pagedep_deps(struct vnode *, struct mount *,
 	    struct diraddhd *);
 static	int free_pagedep(struct pagedep *);
 static	int flush_newblk_dep(struct vnode *, struct mount *, ufs_lbn_t);
 static	int flush_inodedep_deps(struct vnode *, struct mount *, ino_t);
 static	int flush_deplist(struct allocdirectlst *, int, int *);
 static	int sync_cgs(struct mount *, int);
 static	int handle_written_filepage(struct pagedep *, struct buf *, int);
 static	int handle_written_sbdep(struct sbdep *, struct buf *);
 static	void initiate_write_sbdep(struct sbdep *);
 static	void diradd_inode_written(struct diradd *, struct inodedep *);
 static	int handle_written_indirdep(struct indirdep *, struct buf *,
 	    struct buf**, int);
 static	int handle_written_inodeblock(struct inodedep *, struct buf *, int);
 static	int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *,
 	    uint8_t *);
 static	int handle_written_bmsafemap(struct bmsafemap *, struct buf *, int);
 static	void handle_written_jaddref(struct jaddref *);
 static	void handle_written_jremref(struct jremref *);
 static	void handle_written_jseg(struct jseg *, struct buf *);
 static	void handle_written_jnewblk(struct jnewblk *);
 static	void handle_written_jblkdep(struct jblkdep *);
 static	void handle_written_jfreefrag(struct jfreefrag *);
 static	void complete_jseg(struct jseg *);
 static	void complete_jsegs(struct jseg *);
 static	void jseg_write(struct ufsmount *ump, struct jseg *, uint8_t *);
 static	void jaddref_write(struct jaddref *, struct jseg *, uint8_t *);
 static	void jremref_write(struct jremref *, struct jseg *, uint8_t *);
 static	void jmvref_write(struct jmvref *, struct jseg *, uint8_t *);
 static	void jtrunc_write(struct jtrunc *, struct jseg *, uint8_t *);
 static	void jfsync_write(struct jfsync *, struct jseg *, uint8_t *data);
 static	void jnewblk_write(struct jnewblk *, struct jseg *, uint8_t *);
 static	void jfreeblk_write(struct jfreeblk *, struct jseg *, uint8_t *);
 static	void jfreefrag_write(struct jfreefrag *, struct jseg *, uint8_t *);
 static	inline void inoref_write(struct inoref *, struct jseg *,
 	    struct jrefrec *);
 static	void handle_allocdirect_partdone(struct allocdirect *,
 	    struct workhead *);
 static	struct jnewblk *cancel_newblk(struct newblk *, struct worklist *,
 	    struct workhead *);
 static	void indirdep_complete(struct indirdep *);
 static	int indirblk_lookup(struct mount *, ufs2_daddr_t);
 static	void indirblk_insert(struct freework *);
 static	void indirblk_remove(struct freework *);
 static	void handle_allocindir_partdone(struct allocindir *);
 static	void initiate_write_filepage(struct pagedep *, struct buf *);
 static	void initiate_write_indirdep(struct indirdep*, struct buf *);
 static	void handle_written_mkdir(struct mkdir *, int);
 static	int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *,
 	    uint8_t *);
 static	void initiate_write_bmsafemap(struct bmsafemap *, struct buf *);
 static	void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *);
 static	void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *);
 static	void handle_workitem_freefile(struct freefile *);
 static	int handle_workitem_remove(struct dirrem *, int);
 static	struct dirrem *newdirrem(struct buf *, struct inode *,
 	    struct inode *, int, struct dirrem **);
 static	struct indirdep *indirdep_lookup(struct mount *, struct inode *,
 	    struct buf *);
 static	void cancel_indirdep(struct indirdep *, struct buf *,
 	    struct freeblks *);
 static	void free_indirdep(struct indirdep *);
 static	void free_diradd(struct diradd *, struct workhead *);
 static	void merge_diradd(struct inodedep *, struct diradd *);
 static	void complete_diradd(struct diradd *);
 static	struct diradd *diradd_lookup(struct pagedep *, int);
 static	struct jremref *cancel_diradd_dotdot(struct inode *, struct dirrem *,
 	    struct jremref *);
 static	struct jremref *cancel_mkdir_dotdot(struct inode *, struct dirrem *,
 	    struct jremref *);
 static	void cancel_diradd(struct diradd *, struct dirrem *, struct jremref *,
 	    struct jremref *, struct jremref *);
 static	void dirrem_journal(struct dirrem *, struct jremref *, struct jremref *,
 	    struct jremref *);
 static	void cancel_allocindir(struct allocindir *, struct buf *bp,
 	    struct freeblks *, int);
 static	int setup_trunc_indir(struct freeblks *, struct inode *,
 	    ufs_lbn_t, ufs_lbn_t, ufs2_daddr_t);
 static	void complete_trunc_indir(struct freework *);
 static	void trunc_indirdep(struct indirdep *, struct freeblks *, struct buf *,
 	    int);
 static	void complete_mkdir(struct mkdir *);
 static	void free_newdirblk(struct newdirblk *);
 static	void free_jremref(struct jremref *);
 static	void free_jaddref(struct jaddref *);
 static	void free_jsegdep(struct jsegdep *);
 static	void free_jsegs(struct jblocks *);
 static	void rele_jseg(struct jseg *);
 static	void free_jseg(struct jseg *, struct jblocks *);
 static	void free_jnewblk(struct jnewblk *);
 static	void free_jblkdep(struct jblkdep *);
 static	void free_jfreefrag(struct jfreefrag *);
 static	void free_freedep(struct freedep *);
 static	void journal_jremref(struct dirrem *, struct jremref *,
 	    struct inodedep *);
 static	void cancel_jnewblk(struct jnewblk *, struct workhead *);
 static	int cancel_jaddref(struct jaddref *, struct inodedep *,
 	    struct workhead *);
 static	void cancel_jfreefrag(struct jfreefrag *);
 static	inline void setup_freedirect(struct freeblks *, struct inode *,
 	    int, int);
 static	inline void setup_freeext(struct freeblks *, struct inode *, int, int);
 static	inline void setup_freeindir(struct freeblks *, struct inode *, int,
 	    ufs_lbn_t, int);
 static	inline struct freeblks *newfreeblks(struct mount *, struct inode *);
 static	void freeblks_free(struct ufsmount *, struct freeblks *, int);
 static	void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t);
 static	ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t);
 static	int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int);
 static	void trunc_dependencies(struct inode *, struct freeblks *, ufs_lbn_t,
 	    int, int);
 static	void trunc_pages(struct inode *, off_t, ufs2_daddr_t, int);
 static 	int cancel_pagedep(struct pagedep *, struct freeblks *, int);
 static	int deallocate_dependencies(struct buf *, struct freeblks *, int);
 static	void newblk_freefrag(struct newblk*);
 static	void free_newblk(struct newblk *);
 static	void cancel_allocdirect(struct allocdirectlst *,
 	    struct allocdirect *, struct freeblks *);
 static	int check_inode_unwritten(struct inodedep *);
 static	int free_inodedep(struct inodedep *);
 static	void freework_freeblock(struct freework *);
 static	void freework_enqueue(struct freework *);
 static	int handle_workitem_freeblocks(struct freeblks *, int);
 static	int handle_complete_freeblocks(struct freeblks *, int);
 static	void handle_workitem_indirblk(struct freework *);
 static	void handle_written_freework(struct freework *);
 static	void merge_inode_lists(struct allocdirectlst *,struct allocdirectlst *);
 static	struct worklist *jnewblk_merge(struct worklist *, struct worklist *,
 	    struct workhead *);
 static	struct freefrag *setup_allocindir_phase2(struct buf *, struct inode *,
 	    struct inodedep *, struct allocindir *, ufs_lbn_t);
 static	struct allocindir *newallocindir(struct inode *, int, ufs2_daddr_t,
 	    ufs2_daddr_t, ufs_lbn_t);
 static	void handle_workitem_freefrag(struct freefrag *);
 static	struct freefrag *newfreefrag(struct inode *, ufs2_daddr_t, long,
 	    ufs_lbn_t);
 static	void allocdirect_merge(struct allocdirectlst *,
 	    struct allocdirect *, struct allocdirect *);
 static	struct freefrag *allocindir_merge(struct allocindir *,
 	    struct allocindir *);
 static	int bmsafemap_find(struct bmsafemap_hashhead *, int,
 	    struct bmsafemap **);
 static	struct bmsafemap *bmsafemap_lookup(struct mount *, struct buf *,
 	    int cg, struct bmsafemap *);
 static	int newblk_find(struct newblk_hashhead *, ufs2_daddr_t, int,
 	    struct newblk **);
 static	int newblk_lookup(struct mount *, ufs2_daddr_t, int, struct newblk **);
 static	int inodedep_find(struct inodedep_hashhead *, ino_t,
 	    struct inodedep **);
 static	int inodedep_lookup(struct mount *, ino_t, int, struct inodedep **);
 static	int pagedep_lookup(struct mount *, struct buf *bp, ino_t, ufs_lbn_t,
 	    int, struct pagedep **);
 static	int pagedep_find(struct pagedep_hashhead *, ino_t, ufs_lbn_t,
 	    struct pagedep **);
 static	void pause_timer(void *);
 static	int request_cleanup(struct mount *, int);
 static	void schedule_cleanup(struct mount *);
 static void softdep_ast_cleanup_proc(void);
 static	int process_worklist_item(struct mount *, int, int);
 static	void process_removes(struct vnode *);
 static	void process_truncates(struct vnode *);
 static	void jwork_move(struct workhead *, struct workhead *);
 static	void jwork_insert(struct workhead *, struct jsegdep *);
 static	void add_to_worklist(struct worklist *, int);
 static	void wake_worklist(struct worklist *);
 static	void wait_worklist(struct worklist *, char *);
 static	void remove_from_worklist(struct worklist *);
 static	void softdep_flush(void *);
 static	void softdep_flushjournal(struct mount *);
 static	int softdep_speedup(struct ufsmount *);
 static	void worklist_speedup(struct mount *);
 static	int journal_mount(struct mount *, struct fs *, struct ucred *);
 static	void journal_unmount(struct ufsmount *);
 static	int journal_space(struct ufsmount *, int);
 static	void journal_suspend(struct ufsmount *);
 static	int journal_unsuspend(struct ufsmount *ump);
 static	void softdep_prelink(struct vnode *, struct vnode *);
 static	void add_to_journal(struct worklist *);
 static	void remove_from_journal(struct worklist *);
 static	bool softdep_excess_items(struct ufsmount *, int);
 static	void softdep_process_journal(struct mount *, struct worklist *, int);
 static	struct jremref *newjremref(struct dirrem *, struct inode *,
 	    struct inode *ip, off_t, nlink_t);
 static	struct jaddref *newjaddref(struct inode *, ino_t, off_t, int16_t,
 	    uint16_t);
 static	inline void newinoref(struct inoref *, ino_t, ino_t, off_t, nlink_t,
 	    uint16_t);
 static	inline struct jsegdep *inoref_jseg(struct inoref *);
 static	struct jmvref *newjmvref(struct inode *, ino_t, off_t, off_t);
 static	struct jfreeblk *newjfreeblk(struct freeblks *, ufs_lbn_t,
 	    ufs2_daddr_t, int);
 static	void adjust_newfreework(struct freeblks *, int);
 static	struct jtrunc *newjtrunc(struct freeblks *, off_t, int);
 static	void move_newblock_dep(struct jaddref *, struct inodedep *);
 static	void cancel_jfreeblk(struct freeblks *, ufs2_daddr_t);
 static	struct jfreefrag *newjfreefrag(struct freefrag *, struct inode *,
 	    ufs2_daddr_t, long, ufs_lbn_t);
 static	struct freework *newfreework(struct ufsmount *, struct freeblks *,
 	    struct freework *, ufs_lbn_t, ufs2_daddr_t, int, int, int);
 static	int jwait(struct worklist *, int);
 static	struct inodedep *inodedep_lookup_ip(struct inode *);
 static	int bmsafemap_backgroundwrite(struct bmsafemap *, struct buf *);
 static	struct freefile *handle_bufwait(struct inodedep *, struct workhead *);
 static	void handle_jwork(struct workhead *);
 static	struct mkdir *setup_newdir(struct diradd *, ino_t, ino_t, struct buf *,
 	    struct mkdir **);
 static	struct jblocks *jblocks_create(void);
 static	ufs2_daddr_t jblocks_alloc(struct jblocks *, int, int *);
 static	void jblocks_free(struct jblocks *, struct mount *, int);
 static	void jblocks_destroy(struct jblocks *);
 static	void jblocks_add(struct jblocks *, ufs2_daddr_t, int);
 
 /*
  * Exported softdep operations.
  */
 static	void softdep_disk_io_initiation(struct buf *);
 static	void softdep_disk_write_complete(struct buf *);
 static	void softdep_deallocate_dependencies(struct buf *);
 static	int softdep_count_dependencies(struct buf *bp, int);
 
 /*
  * Global lock over all of soft updates.
  */
 static struct mtx lk;
 MTX_SYSINIT(softdep_lock, &lk, "Global Softdep Lock", MTX_DEF);
 
 #define ACQUIRE_GBLLOCK(lk)	mtx_lock(lk)
 #define FREE_GBLLOCK(lk)	mtx_unlock(lk)
 #define GBLLOCK_OWNED(lk)	mtx_assert((lk), MA_OWNED)
 
 /*
  * Per-filesystem soft-updates locking.
  */
 #define LOCK_PTR(ump)		(&(ump)->um_softdep->sd_fslock)
 #define TRY_ACQUIRE_LOCK(ump)	rw_try_wlock(&(ump)->um_softdep->sd_fslock)
 #define ACQUIRE_LOCK(ump)	rw_wlock(&(ump)->um_softdep->sd_fslock)
 #define FREE_LOCK(ump)		rw_wunlock(&(ump)->um_softdep->sd_fslock)
 #define LOCK_OWNED(ump)		rw_assert(&(ump)->um_softdep->sd_fslock, \
 				    RA_WLOCKED)
 
 #define	BUF_AREC(bp)		lockallowrecurse(&(bp)->b_lock)
 #define	BUF_NOREC(bp)		lockdisablerecurse(&(bp)->b_lock)
 
 /*
  * Worklist queue management.
  * These routines require that the lock be held.
  */
 #ifndef /* NOT */ DEBUG
 #define WORKLIST_INSERT(head, item) do {	\
 	(item)->wk_state |= ONWORKLIST;		\
 	LIST_INSERT_HEAD(head, item, wk_list);	\
 } while (0)
 #define WORKLIST_REMOVE(item) do {		\
 	(item)->wk_state &= ~ONWORKLIST;	\
 	LIST_REMOVE(item, wk_list);		\
 } while (0)
 #define WORKLIST_INSERT_UNLOCKED	WORKLIST_INSERT
 #define WORKLIST_REMOVE_UNLOCKED	WORKLIST_REMOVE
 
 #else /* DEBUG */
 static	void worklist_insert(struct workhead *, struct worklist *, int);
 static	void worklist_remove(struct worklist *, int);
 
 #define WORKLIST_INSERT(head, item) worklist_insert(head, item, 1)
 #define WORKLIST_INSERT_UNLOCKED(head, item) worklist_insert(head, item, 0)
 #define WORKLIST_REMOVE(item) worklist_remove(item, 1)
 #define WORKLIST_REMOVE_UNLOCKED(item) worklist_remove(item, 0)
 
 static void
 worklist_insert(head, item, locked)
 	struct workhead *head;
 	struct worklist *item;
 	int locked;
 {
 
 	if (locked)
 		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if (item->wk_state & ONWORKLIST)
 		panic("worklist_insert: %p %s(0x%X) already on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state |= ONWORKLIST;
 	LIST_INSERT_HEAD(head, item, wk_list);
 }
 
 static void
 worklist_remove(item, locked)
 	struct worklist *item;
 	int locked;
 {
 
 	if (locked)
 		LOCK_OWNED(VFSTOUFS(item->wk_mp));
 	if ((item->wk_state & ONWORKLIST) == 0)
 		panic("worklist_remove: %p %s(0x%X) not on list",
 		    item, TYPENAME(item->wk_type), item->wk_state);
 	item->wk_state &= ~ONWORKLIST;
 	LIST_REMOVE(item, wk_list);
 }
 #endif /* DEBUG */
 
 /*
  * Merge two jsegdeps keeping only the oldest one as newer references
  * can't be discarded until after older references.
  */
 static inline struct jsegdep *
 jsegdep_merge(struct jsegdep *one, struct jsegdep *two)
 {
 	struct jsegdep *swp;
 
 	if (two == NULL)
 		return (one);
 
 	if (one->jd_seg->js_seq > two->jd_seg->js_seq) {
 		swp = one;
 		one = two;
 		two = swp;
 	}
 	WORKLIST_REMOVE(&two->jd_list);
 	free_jsegdep(two);
 
 	return (one);
 }
 
 /*
  * If two freedeps are compatible free one to reduce list size.
  */
 static inline struct freedep *
 freedep_merge(struct freedep *one, struct freedep *two)
 {
 	if (two == NULL)
 		return (one);
 
 	if (one->fd_freework == two->fd_freework) {
 		WORKLIST_REMOVE(&two->fd_list);
 		free_freedep(two);
 	}
 	return (one);
 }
 
 /*
  * Move journal work from one list to another.  Duplicate freedeps and
  * jsegdeps are coalesced to keep the lists as small as possible.
  */
 static void
 jwork_move(dst, src)
 	struct workhead *dst;
 	struct workhead *src;
 {
 	struct freedep *freedep;
 	struct jsegdep *jsegdep;
 	struct worklist *wkn;
 	struct worklist *wk;
 
 	KASSERT(dst != src,
 	    ("jwork_move: dst == src"));
 	freedep = NULL;
 	jsegdep = NULL;
 	LIST_FOREACH_SAFE(wk, dst, wk_list, wkn) {
 		if (wk->wk_type == D_JSEGDEP)
 			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
 		else if (wk->wk_type == D_FREEDEP)
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 
 	while ((wk = LIST_FIRST(src)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(dst, wk);
 		if (wk->wk_type == D_JSEGDEP) {
 			jsegdep = jsegdep_merge(WK_JSEGDEP(wk), jsegdep);
 			continue;
 		}
 		if (wk->wk_type == D_FREEDEP)
 			freedep = freedep_merge(WK_FREEDEP(wk), freedep);
 	}
 }
 
 static void
 jwork_insert(dst, jsegdep)
 	struct workhead *dst;
 	struct jsegdep *jsegdep;
 {
 	struct jsegdep *jsegdepn;
 	struct worklist *wk;
 
 	LIST_FOREACH(wk, dst, wk_list)
 		if (wk->wk_type == D_JSEGDEP)
 			break;
 	if (wk == NULL) {
 		WORKLIST_INSERT(dst, &jsegdep->jd_list);
 		return;
 	}
 	jsegdepn = WK_JSEGDEP(wk);
 	if (jsegdep->jd_seg->js_seq < jsegdepn->jd_seg->js_seq) {
 		WORKLIST_REMOVE(wk);
 		free_jsegdep(jsegdepn);
 		WORKLIST_INSERT(dst, &jsegdep->jd_list);
 	} else
 		free_jsegdep(jsegdep);
 }
 
 /*
  * Routines for tracking and managing workitems.
  */
 static	void workitem_free(struct worklist *, int);
 static	void workitem_alloc(struct worklist *, int, struct mount *);
 static	void workitem_reassign(struct worklist *, int);
 
 #define	WORKITEM_FREE(item, type) \
 	workitem_free((struct worklist *)(item), (type))
 #define	WORKITEM_REASSIGN(item, type) \
 	workitem_reassign((struct worklist *)(item), (type))
 
 static void
 workitem_free(item, type)
 	struct worklist *item;
 	int type;
 {
 	struct ufsmount *ump;
 
 #ifdef DEBUG
 	if (item->wk_state & ONWORKLIST)
 		panic("workitem_free: %s(0x%X) still on list",
 		    TYPENAME(item->wk_type), item->wk_state);
 	if (item->wk_type != type && type != D_NEWBLK)
 		panic("workitem_free: type mismatch %s != %s",
 		    TYPENAME(item->wk_type), TYPENAME(type));
 #endif
 	if (item->wk_state & IOWAITING)
 		wakeup(item);
 	ump = VFSTOUFS(item->wk_mp);
 	LOCK_OWNED(ump);
 	KASSERT(ump->softdep_deps > 0,
 	    ("workitem_free: %s: softdep_deps going negative",
 	    ump->um_fs->fs_fsmnt));
 	if (--ump->softdep_deps == 0 && ump->softdep_req)
 		wakeup(&ump->softdep_deps);
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_free: %s: dep_current[%s] going negative",
 	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
 	    ("workitem_free: %s: softdep_curdeps[%s] going negative",
 	    ump->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	atomic_subtract_long(&dep_current[item->wk_type], 1);
 	ump->softdep_curdeps[item->wk_type] -= 1;
 	free(item, DtoM(type));
 }
 
 static void
 workitem_alloc(item, type, mp)
 	struct worklist *item;
 	int type;
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 
 	item->wk_type = type;
 	item->wk_mp = mp;
 	item->wk_state = 0;
 
 	ump = VFSTOUFS(mp);
 	ACQUIRE_GBLLOCK(&lk);
 	dep_current[type]++;
 	if (dep_current[type] > dep_highuse[type])
 		dep_highuse[type] = dep_current[type];
 	dep_total[type]++;
 	FREE_GBLLOCK(&lk);
 	ACQUIRE_LOCK(ump);
 	ump->softdep_curdeps[type] += 1;
 	ump->softdep_deps++;
 	ump->softdep_accdeps++;
 	FREE_LOCK(ump);
 }
 
 static void
 workitem_reassign(item, newtype)
 	struct worklist *item;
 	int newtype;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(item->wk_mp);
 	LOCK_OWNED(ump);
 	KASSERT(ump->softdep_curdeps[item->wk_type] > 0,
 	    ("workitem_reassign: %s: softdep_curdeps[%s] going negative",
 	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	ump->softdep_curdeps[item->wk_type] -= 1;
 	ump->softdep_curdeps[newtype] += 1;
 	KASSERT(dep_current[item->wk_type] > 0,
 	    ("workitem_reassign: %s: dep_current[%s] going negative",
 	    VFSTOUFS(item->wk_mp)->um_fs->fs_fsmnt, TYPENAME(item->wk_type)));
 	ACQUIRE_GBLLOCK(&lk);
 	dep_current[newtype]++;
 	dep_current[item->wk_type]--;
 	if (dep_current[newtype] > dep_highuse[newtype])
 		dep_highuse[newtype] = dep_current[newtype];
 	dep_total[newtype]++;
 	FREE_GBLLOCK(&lk);
 	item->wk_type = newtype;
 }
 
 /*
  * Workitem queue management
  */
 static int max_softdeps;	/* maximum number of structs before slowdown */
 static int tickdelay = 2;	/* number of ticks to pause during slowdown */
 static int proc_waiting;	/* tracks whether we have a timeout posted */
 static int *stat_countp;	/* statistic to count in proc_waiting timeout */
 static struct callout softdep_callout;
 static int req_clear_inodedeps;	/* syncer process flush some inodedeps */
 static int req_clear_remove;	/* syncer process flush some freeblks */
 static int softdep_flushcache = 0; /* Should we do BIO_FLUSH? */
 
 /*
  * runtime statistics
  */
 static int stat_flush_threads;	/* number of softdep flushing threads */
 static int stat_worklist_push;	/* number of worklist cleanups */
 static int stat_blk_limit_push;	/* number of times block limit neared */
 static int stat_ino_limit_push;	/* number of times inode limit neared */
 static int stat_blk_limit_hit;	/* number of times block slowdown imposed */
 static int stat_ino_limit_hit;	/* number of times inode slowdown imposed */
 static int stat_sync_limit_hit;	/* number of synchronous slowdowns imposed */
 static int stat_indir_blk_ptrs;	/* bufs redirtied as indir ptrs not written */
 static int stat_inode_bitmap;	/* bufs redirtied as inode bitmap not written */
 static int stat_direct_blk_ptrs;/* bufs redirtied as direct ptrs not written */
 static int stat_dir_entry;	/* bufs redirtied as dir entry cannot write */
 static int stat_jaddref;	/* bufs redirtied as ino bitmap can not write */
 static int stat_jnewblk;	/* bufs redirtied as blk bitmap can not write */
 static int stat_journal_min;	/* Times hit journal min threshold */
 static int stat_journal_low;	/* Times hit journal low threshold */
 static int stat_journal_wait;	/* Times blocked in jwait(). */
 static int stat_jwait_filepage;	/* Times blocked in jwait() for filepage. */
 static int stat_jwait_freeblks;	/* Times blocked in jwait() for freeblks. */
 static int stat_jwait_inode;	/* Times blocked in jwait() for inodes. */
 static int stat_jwait_newblk;	/* Times blocked in jwait() for newblks. */
 static int stat_cleanup_high_delay; /* Maximum cleanup delay (in ticks) */
 static int stat_cleanup_blkrequests; /* Number of block cleanup requests */
 static int stat_cleanup_inorequests; /* Number of inode cleanup requests */
 static int stat_cleanup_retries; /* Number of cleanups that needed to flush */
 static int stat_cleanup_failures; /* Number of cleanup requests that failed */
 static int stat_emptyjblocks; /* Number of potentially empty journal blocks */
 
 SYSCTL_INT(_debug_softdep, OID_AUTO, max_softdeps, CTLFLAG_RW,
     &max_softdeps, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, tickdelay, CTLFLAG_RW,
     &tickdelay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, flush_threads, CTLFLAG_RD,
     &stat_flush_threads, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, worklist_push, CTLFLAG_RW,
     &stat_worklist_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_push, CTLFLAG_RW,
     &stat_blk_limit_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_push, CTLFLAG_RW,
     &stat_ino_limit_push, 0,"");
 SYSCTL_INT(_debug_softdep, OID_AUTO, blk_limit_hit, CTLFLAG_RW,
     &stat_blk_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, ino_limit_hit, CTLFLAG_RW,
     &stat_ino_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, sync_limit_hit, CTLFLAG_RW,
     &stat_sync_limit_hit, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, indir_blk_ptrs, CTLFLAG_RW,
     &stat_indir_blk_ptrs, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, inode_bitmap, CTLFLAG_RW,
     &stat_inode_bitmap, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, direct_blk_ptrs, CTLFLAG_RW,
     &stat_direct_blk_ptrs, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, dir_entry, CTLFLAG_RW,
     &stat_dir_entry, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jaddref_rollback, CTLFLAG_RW,
     &stat_jaddref, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jnewblk_rollback, CTLFLAG_RW,
     &stat_jnewblk, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_low, CTLFLAG_RW,
     &stat_journal_low, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_min, CTLFLAG_RW,
     &stat_journal_min, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, journal_wait, CTLFLAG_RW,
     &stat_journal_wait, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_filepage, CTLFLAG_RW,
     &stat_jwait_filepage, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_freeblks, CTLFLAG_RW,
     &stat_jwait_freeblks, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_inode, CTLFLAG_RW,
     &stat_jwait_inode, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, jwait_newblk, CTLFLAG_RW,
     &stat_jwait_newblk, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_blkrequests, CTLFLAG_RW,
     &stat_cleanup_blkrequests, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_inorequests, CTLFLAG_RW,
     &stat_cleanup_inorequests, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_high_delay, CTLFLAG_RW,
     &stat_cleanup_high_delay, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_retries, CTLFLAG_RW,
     &stat_cleanup_retries, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, cleanup_failures, CTLFLAG_RW,
     &stat_cleanup_failures, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, flushcache, CTLFLAG_RW,
     &softdep_flushcache, 0, "");
 SYSCTL_INT(_debug_softdep, OID_AUTO, emptyjblocks, CTLFLAG_RD,
     &stat_emptyjblocks, 0, "");
 
 SYSCTL_DECL(_vfs_ffs);
 
 /* Whether to recompute the summary at mount time */
 static int compute_summary_at_mount = 0;
 SYSCTL_INT(_vfs_ffs, OID_AUTO, compute_summary_at_mount, CTLFLAG_RW,
 	   &compute_summary_at_mount, 0, "Recompute summary at mount");
 static int print_threads = 0;
 SYSCTL_INT(_debug_softdep, OID_AUTO, print_threads, CTLFLAG_RW,
     &print_threads, 0, "Notify flusher thread start/stop");
 
 /* List of all filesystems mounted with soft updates */
 static TAILQ_HEAD(, mount_softdeps) softdepmounts;
 
 /*
  * This function cleans the worklist for a filesystem.
  * Each filesystem running with soft dependencies gets its own
  * thread to run in this function. The thread is started up in
  * softdep_mount and shutdown in softdep_unmount. They show up
  * as part of the kernel "bufdaemon" process whose process
  * entry is available in bufdaemonproc.
  */
 static int searchfailed;
 extern struct proc *bufdaemonproc;
 static void
 softdep_flush(addr)
 	void *addr;
 {
 	struct mount *mp;
 	struct thread *td;
 	struct ufsmount *ump;
 
 	td = curthread;
 	td->td_pflags |= TDP_NORUNNINGBUF;
 	mp = (struct mount *)addr;
 	ump = VFSTOUFS(mp);
 	atomic_add_int(&stat_flush_threads, 1);
 	ACQUIRE_LOCK(ump);
 	ump->softdep_flags &= ~FLUSH_STARTING;
 	wakeup(&ump->softdep_flushtd);
 	FREE_LOCK(ump);
 	if (print_threads) {
 		if (stat_flush_threads == 1)
 			printf("Running %s at pid %d\n", bufdaemonproc->p_comm,
 			    bufdaemonproc->p_pid);
 		printf("Start thread %s\n", td->td_name);
 	}
 	for (;;) {	
 		while (softdep_process_worklist(mp, 0) > 0 ||
 		    (MOUNTEDSUJ(mp) &&
 		    VFSTOUFS(mp)->softdep_jblocks->jb_suspended))
 			kthread_suspend_check();
 		ACQUIRE_LOCK(ump);
 		if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 			msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM,
 			    "sdflush", hz / 2);
 		ump->softdep_flags &= ~FLUSH_CLEANUP;
 		/*
 		 * Check to see if we are done and need to exit.
 		 */
 		if ((ump->softdep_flags & FLUSH_EXIT) == 0) {
 			FREE_LOCK(ump);
 			continue;
 		}
 		ump->softdep_flags &= ~FLUSH_EXIT;
 		FREE_LOCK(ump);
 		wakeup(&ump->softdep_flags);
 		if (print_threads)
 			printf("Stop thread %s: searchfailed %d, did cleanups %d\n", td->td_name, searchfailed, ump->um_softdep->sd_cleanups);
 		atomic_subtract_int(&stat_flush_threads, 1);
 		kthread_exit();
 		panic("kthread_exit failed\n");
 	}
 }
 
 static void
 worklist_speedup(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if ((ump->softdep_flags & (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 		ump->softdep_flags |= FLUSH_CLEANUP;
 	wakeup(&ump->softdep_flushtd);
 }
 
 static int
 softdep_speedup(ump)
 	struct ufsmount *ump;
 {
 	struct ufsmount *altump;
 	struct mount_softdeps *sdp;
 
 	LOCK_OWNED(ump);
 	worklist_speedup(ump->um_mountp);
 	bd_speedup();
 	/*
 	 * If we have global shortages, then we need other
 	 * filesystems to help with the cleanup. Here we wakeup a
 	 * flusher thread for a filesystem that is over its fair
 	 * share of resources.
 	 */
 	if (req_clear_inodedeps || req_clear_remove) {
 		ACQUIRE_GBLLOCK(&lk);
 		TAILQ_FOREACH(sdp, &softdepmounts, sd_next) {
 			if ((altump = sdp->sd_ump) == ump)
 				continue;
 			if (((req_clear_inodedeps &&
 			    altump->softdep_curdeps[D_INODEDEP] >
 			    max_softdeps / stat_flush_threads) ||
 			    (req_clear_remove &&
 			    altump->softdep_curdeps[D_DIRREM] >
 			    (max_softdeps / 2) / stat_flush_threads)) &&
 			    TRY_ACQUIRE_LOCK(altump))
 				break;
 		}
 		if (sdp == NULL) {
 			searchfailed++;
 			FREE_GBLLOCK(&lk);
 		} else {
 			/*
 			 * Move to the end of the list so we pick a
 			 * different one on out next try.
 			 */
 			TAILQ_REMOVE(&softdepmounts, sdp, sd_next);
 			TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
 			FREE_GBLLOCK(&lk);
 			if ((altump->softdep_flags &
 			    (FLUSH_CLEANUP | FLUSH_EXIT)) == 0)
 				altump->softdep_flags |= FLUSH_CLEANUP;
 			altump->um_softdep->sd_cleanups++;
 			wakeup(&altump->softdep_flushtd);
 			FREE_LOCK(altump);
 		}
 	}
 	return (speedup_syncer());
 }
 
 /*
  * Add an item to the end of the work queue.
  * This routine requires that the lock be held.
  * This is the only routine that adds items to the list.
  * The following routine is the only one that removes items
  * and does so in order from first to last.
  */
 
 #define	WK_HEAD		0x0001	/* Add to HEAD. */
 #define	WK_NODELAY	0x0002	/* Process immediately. */
 
 static void
 add_to_worklist(wk, flags)
 	struct worklist *wk;
 	int flags;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_worklist: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
 	wk->wk_state |= ONWORKLIST;
 	if (ump->softdep_on_worklist == 0) {
 		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
 		ump->softdep_worklist_tail = wk;
 	} else if (flags & WK_HEAD) {
 		LIST_INSERT_HEAD(&ump->softdep_workitem_pending, wk, wk_list);
 	} else {
 		LIST_INSERT_AFTER(ump->softdep_worklist_tail, wk, wk_list);
 		ump->softdep_worklist_tail = wk;
 	}
 	ump->softdep_on_worklist += 1;
 	if (flags & WK_NODELAY)
 		worklist_speedup(wk->wk_mp);
 }
 
 /*
  * Remove the item to be processed. If we are removing the last
  * item on the list, we need to recalculate the tail pointer.
  */
 static void
 remove_from_worklist(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	WORKLIST_REMOVE(wk);
 	if (ump->softdep_worklist_tail == wk)
 		ump->softdep_worklist_tail =
 		    (struct worklist *)wk->wk_list.le_prev;
 	ump->softdep_on_worklist -= 1;
 }
 
 static void
 wake_worklist(wk)
 	struct worklist *wk;
 {
 	if (wk->wk_state & IOWAITING) {
 		wk->wk_state &= ~IOWAITING;
 		wakeup(wk);
 	}
 }
 
 static void
 wait_worklist(wk, wmesg)
 	struct worklist *wk;
 	char *wmesg;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	wk->wk_state |= IOWAITING;
 	msleep(wk, LOCK_PTR(ump), PVM, wmesg, 0);
 }
 
 /*
  * Process that runs once per second to handle items in the background queue.
  *
  * Note that we ensure that everything is done in the order in which they
  * appear in the queue. The code below depends on this property to ensure
  * that blocks of a file are freed before the inode itself is freed. This
  * ordering ensures that no new <vfsid, inum, lbn> triples will be generated
  * until all the old ones have been purged from the dependency lists.
  */
 static int 
 softdep_process_worklist(mp, full)
 	struct mount *mp;
 	int full;
 {
 	int cnt, matchcnt;
 	struct ufsmount *ump;
 	long starttime;
 
 	KASSERT(mp != NULL, ("softdep_process_worklist: NULL mp"));
 	if (MOUNTEDSOFTDEP(mp) == 0)
 		return (0);
 	matchcnt = 0;
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	starttime = time_second;
 	softdep_process_journal(mp, NULL, full ? MNT_WAIT : 0);
 	check_clear_deps(mp);
 	while (ump->softdep_on_worklist > 0) {
 		if ((cnt = process_worklist_item(mp, 10, LK_NOWAIT)) == 0)
 			break;
 		else
 			matchcnt += cnt;
 		check_clear_deps(mp);
 		/*
 		 * We do not generally want to stop for buffer space, but if
 		 * we are really being a buffer hog, we will stop and wait.
 		 */
 		if (should_yield()) {
 			FREE_LOCK(ump);
 			kern_yield(PRI_USER);
 			bwillwrite();
 			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * Never allow processing to run for more than one
 		 * second. This gives the syncer thread the opportunity
 		 * to pause if appropriate.
 		 */
 		if (!full && starttime != time_second)
 			break;
 	}
 	if (full == 0)
 		journal_unsuspend(ump);
 	FREE_LOCK(ump);
 	return (matchcnt);
 }
 
 /*
  * Process all removes associated with a vnode if we are running out of
  * journal space.  Any other process which attempts to flush these will
  * be unable as we have the vnodes locked.
  */
 static void
 process_removes(vp)
 	struct vnode *vp;
 {
 	struct inodedep *inodedep;
 	struct dirrem *dirrem;
 	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 top:
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
 			return;
 		LIST_FOREACH(dirrem, &inodedep->id_dirremhd, dm_inonext) {
 			/*
 			 * If another thread is trying to lock this vnode
 			 * it will fail but we must wait for it to do so
 			 * before we can proceed.
 			 */
 			if (dirrem->dm_state & INPROGRESS) {
 				wait_worklist(&dirrem->dm_list, "pwrwait");
 				goto top;
 			}
 			if ((dirrem->dm_state & (COMPLETE | ONWORKLIST)) == 
 			    (COMPLETE | ONWORKLIST))
 				break;
 		}
 		if (dirrem == NULL)
 			return;
 		remove_from_worklist(&dirrem->dm_list);
 		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_removes: suspended filesystem");
 		handle_workitem_remove(dirrem, 0);
 		vn_finished_secondary_write(mp);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 /*
  * Process all truncations associated with a vnode if we are running out
  * of journal space.  This is called when the vnode lock is already held
  * and no other process can clear the truncation.  This function returns
  * a value greater than zero if it did any work.
  */
 static void
 process_truncates(vp)
 	struct vnode *vp;
 {
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 	struct mount *mp;
 	ino_t inum;
 	int cgwait;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	inum = VTOI(vp)->i_number;
 	for (;;) {
 		if (inodedep_lookup(mp, inum, 0, &inodedep) == 0)
 			return;
 		cgwait = 0;
 		TAILQ_FOREACH(freeblks, &inodedep->id_freeblklst, fb_next) {
 			/* Journal entries not yet written.  */
 			if (!LIST_EMPTY(&freeblks->fb_jblkdephd)) {
 				jwait(&LIST_FIRST(
 				    &freeblks->fb_jblkdephd)->jb_list,
 				    MNT_WAIT);
 				break;
 			}
 			/* Another thread is executing this item. */
 			if (freeblks->fb_state & INPROGRESS) {
 				wait_worklist(&freeblks->fb_list, "ptrwait");
 				break;
 			}
 			/* Freeblks is waiting on a inode write. */
 			if ((freeblks->fb_state & COMPLETE) == 0) {
 				FREE_LOCK(ump);
 				ffs_update(vp, 1);
 				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST)) ==
 			    (ALLCOMPLETE | ONWORKLIST)) {
 				remove_from_worklist(&freeblks->fb_list);
 				freeblks->fb_state |= INPROGRESS;
 				FREE_LOCK(ump);
 				if (vn_start_secondary_write(NULL, &mp,
 				    V_NOWAIT))
 					panic("process_truncates: "
 					    "suspended filesystem");
 				handle_workitem_freeblocks(freeblks, 0);
 				vn_finished_secondary_write(mp);
 				ACQUIRE_LOCK(ump);
 				break;
 			}
 			if (freeblks->fb_cgwait)
 				cgwait++;
 		}
 		if (cgwait) {
 			FREE_LOCK(ump);
 			sync_cgs(mp, MNT_WAIT);
 			ffs_sync_snap(mp, MNT_WAIT);
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		if (freeblks == NULL)
 			break;
 	}
 	return;
 }
 
 /*
  * Process one item on the worklist.
  */
 static int
 process_worklist_item(mp, target, flags)
 	struct mount *mp;
 	int target;
 	int flags;
 {
 	struct worklist sentinel;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	int matchcnt;
 	int error;
 
 	KASSERT(mp != NULL, ("process_worklist_item: NULL mp"));
 	/*
 	 * If we are being called because of a process doing a
 	 * copy-on-write, then it is not safe to write as we may
 	 * recurse into the copy-on-write routine.
 	 */
 	if (curthread->td_pflags & TDP_COWINPROGRESS)
 		return (-1);
 	PHOLD(curproc);	/* Don't let the stack go away. */
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	matchcnt = 0;
 	sentinel.wk_mp = NULL;
 	sentinel.wk_type = D_SENTINEL;
 	LIST_INSERT_HEAD(&ump->softdep_workitem_pending, &sentinel, wk_list);
 	for (wk = LIST_NEXT(&sentinel, wk_list); wk != NULL;
 	    wk = LIST_NEXT(&sentinel, wk_list)) {
 		if (wk->wk_type == D_SENTINEL) {
 			LIST_REMOVE(&sentinel, wk_list);
 			LIST_INSERT_AFTER(wk, &sentinel, wk_list);
 			continue;
 		}
 		if (wk->wk_state & INPROGRESS)
 			panic("process_worklist_item: %p already in progress.",
 			    wk);
 		wk->wk_state |= INPROGRESS;
 		remove_from_worklist(wk);
 		FREE_LOCK(ump);
 		if (vn_start_secondary_write(NULL, &mp, V_NOWAIT))
 			panic("process_worklist_item: suspended filesystem");
 		switch (wk->wk_type) {
 		case D_DIRREM:
 			/* removal of a directory entry */
 			error = handle_workitem_remove(WK_DIRREM(wk), flags);
 			break;
 
 		case D_FREEBLKS:
 			/* releasing blocks and/or fragments from a file */
 			error = handle_workitem_freeblocks(WK_FREEBLKS(wk),
 			    flags);
 			break;
 
 		case D_FREEFRAG:
 			/* releasing a fragment when replaced as a file grows */
 			handle_workitem_freefrag(WK_FREEFRAG(wk));
 			error = 0;
 			break;
 
 		case D_FREEFILE:
 			/* releasing an inode when its link count drops to 0 */
 			handle_workitem_freefile(WK_FREEFILE(wk));
 			error = 0;
 			break;
 
 		default:
 			panic("%s_process_worklist: Unknown type %s",
 			    "softdep", TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 		vn_finished_secondary_write(mp);
 		ACQUIRE_LOCK(ump);
 		if (error == 0) {
 			if (++matchcnt == target)
 				break;
 			continue;
 		}
 		/*
 		 * We have to retry the worklist item later.  Wake up any
 		 * waiters who may be able to complete it immediately and
 		 * add the item back to the head so we don't try to execute
 		 * it again.
 		 */
 		wk->wk_state &= ~INPROGRESS;
 		wake_worklist(wk);
 		add_to_worklist(wk, WK_HEAD);
 	}
 	LIST_REMOVE(&sentinel, wk_list);
 	/* Sentinal could've become the tail from remove_from_worklist. */
 	if (ump->softdep_worklist_tail == &sentinel)
 		ump->softdep_worklist_tail =
 		    (struct worklist *)sentinel.wk_list.le_prev;
 	PRELE(curproc);
 	return (matchcnt);
 }
 
 /*
  * Move dependencies from one buffer to another.
  */
 int
 softdep_move_dependencies(oldbp, newbp)
 	struct buf *oldbp;
 	struct buf *newbp;
 {
 	struct worklist *wk, *wktail;
 	struct ufsmount *ump;
 	int dirty;
 
 	if ((wk = LIST_FIRST(&oldbp->b_dep)) == NULL)
 		return (0);
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_move_dependencies called on non-softdep filesystem"));
 	dirty = 0;
 	wktail = NULL;
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&oldbp->b_dep)) != NULL) {
 		LIST_REMOVE(wk, wk_list);
 		if (wk->wk_type == D_BMSAFEMAP &&
 		    bmsafemap_backgroundwrite(WK_BMSAFEMAP(wk), newbp))
 			dirty = 1;
 		if (wktail == NULL)
 			LIST_INSERT_HEAD(&newbp->b_dep, wk, wk_list);
 		else
 			LIST_INSERT_AFTER(wktail, wk, wk_list);
 		wktail = wk;
 	}
 	FREE_LOCK(ump);
 
 	return (dirty);
 }
 
 /*
  * Purge the work list of all items associated with a particular mount point.
  */
 int
 softdep_flushworklist(oldmnt, countp, td)
 	struct mount *oldmnt;
 	int *countp;
 	struct thread *td;
 {
 	struct vnode *devvp;
 	struct ufsmount *ump;
 	int count, error;
 
 	/*
 	 * Alternately flush the block device associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. We continue until no more worklist dependencies
 	 * are found.
 	 */
 	*countp = 0;
 	error = 0;
 	ump = VFSTOUFS(oldmnt);
 	devvp = ump->um_devvp;
 	while ((count = softdep_process_worklist(oldmnt, 1)) > 0) {
 		*countp += count;
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
 		if (error != 0)
 			break;
 	}
 	return (error);
 }
 
 #define	SU_WAITIDLE_RETRIES	20
 static int
 softdep_waitidle(struct mount *mp, int flags __unused)
 {
 	struct ufsmount *ump;
 	struct vnode *devvp;
 	struct thread *td;
 	int error, i;
 
 	ump = VFSTOUFS(mp);
 	devvp = ump->um_devvp;
 	td = curthread;
 	error = 0;
 	ACQUIRE_LOCK(ump);
 	for (i = 0; i < SU_WAITIDLE_RETRIES && ump->softdep_deps != 0; i++) {
 		ump->softdep_req = 1;
 		KASSERT((flags & FORCECLOSE) == 0 ||
 		    ump->softdep_on_worklist == 0,
 		    ("softdep_waitidle: work added after flush"));
 		msleep(&ump->softdep_deps, LOCK_PTR(ump), PVM | PDROP,
 		    "softdeps", 10 * hz);
 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
 		error = VOP_FSYNC(devvp, MNT_WAIT, td);
 		VOP_UNLOCK(devvp, 0);
 		ACQUIRE_LOCK(ump);
 		if (error != 0)
 			break;
 	}
 	ump->softdep_req = 0;
 	if (i == SU_WAITIDLE_RETRIES && error == 0 && ump->softdep_deps != 0) {
 		error = EBUSY;
 		printf("softdep_waitidle: Failed to flush worklist for %p\n",
 		    mp);
 	}
 	FREE_LOCK(ump);
 	return (error);
 }
 
 /*
  * Flush all vnodes and worklist items associated with a specified mount point.
  */
 int
 softdep_flushfiles(oldmnt, flags, td)
 	struct mount *oldmnt;
 	int flags;
 	struct thread *td;
 {
 #ifdef QUOTA
 	struct ufsmount *ump;
 	int i;
 #endif
 	int error, early, depcount, loopcnt, retry_flush_count, retry;
 	int morework;
 
 	KASSERT(MOUNTEDSOFTDEP(oldmnt) != 0,
 	    ("softdep_flushfiles called on non-softdep filesystem"));
 	loopcnt = 10;
 	retry_flush_count = 3;
 retry_flush:
 	error = 0;
 
 	/*
 	 * Alternately flush the vnodes associated with the mount
 	 * point and process any dependencies that the flushing
 	 * creates. In theory, this loop can happen at most twice,
 	 * but we give it a few extra just to be sure.
 	 */
 	for (; loopcnt > 0; loopcnt--) {
 		/*
 		 * Do another flush in case any vnodes were brought in
 		 * as part of the cleanup operations.
 		 */
 		early = retry_flush_count == 1 || (oldmnt->mnt_kern_flag &
 		    MNTK_UNMOUNT) == 0 ? 0 : EARLYFLUSH;
 		if ((error = ffs_flushfiles(oldmnt, flags | early, td)) != 0)
 			break;
 		if ((error = softdep_flushworklist(oldmnt, &depcount, td)) != 0 ||
 		    depcount == 0)
 			break;
 	}
 	/*
 	 * If we are unmounting then it is an error to fail. If we
 	 * are simply trying to downgrade to read-only, then filesystem
 	 * activity can keep us busy forever, so we just fail with EBUSY.
 	 */
 	if (loopcnt == 0) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT)
 			panic("softdep_flushfiles: looping");
 		error = EBUSY;
 	}
 	if (!error)
 		error = softdep_waitidle(oldmnt, flags);
 	if (!error) {
 		if (oldmnt->mnt_kern_flag & MNTK_UNMOUNT) {
 			retry = 0;
 			MNT_ILOCK(oldmnt);
 			KASSERT((oldmnt->mnt_kern_flag & MNTK_NOINSMNTQ) != 0,
 			    ("softdep_flushfiles: !MNTK_NOINSMNTQ"));
 			morework = oldmnt->mnt_nvnodelistsize > 0;
 #ifdef QUOTA
 			ump = VFSTOUFS(oldmnt);
 			UFS_LOCK(ump);
 			for (i = 0; i < MAXQUOTAS; i++) {
 				if (ump->um_quotas[i] != NULLVP)
 					morework = 1;
 			}
 			UFS_UNLOCK(ump);
 #endif
 			if (morework) {
 				if (--retry_flush_count > 0) {
 					retry = 1;
 					loopcnt = 3;
 				} else
 					error = EBUSY;
 			}
 			MNT_IUNLOCK(oldmnt);
 			if (retry)
 				goto retry_flush;
 		}
 	}
 	return (error);
 }
 
 /*
  * Structure hashing.
  * 
  * There are four types of structures that can be looked up:
  *	1) pagedep structures identified by mount point, inode number,
  *	   and logical block.
  *	2) inodedep structures identified by mount point and inode number.
  *	3) newblk structures identified by mount point and
  *	   physical block number.
  *	4) bmsafemap structures identified by mount point and
  *	   cylinder group number.
  *
  * The "pagedep" and "inodedep" dependency structures are hashed
  * separately from the file blocks and inodes to which they correspond.
  * This separation helps when the in-memory copy of an inode or
  * file block must be replaced. It also obviates the need to access
  * an inode or file page when simply updating (or de-allocating)
  * dependency structures. Lookup of newblk structures is needed to
  * find newly allocated blocks when trying to associate them with
  * their allocdirect or allocindir structure.
  *
  * The lookup routines optionally create and hash a new instance when
  * an existing entry is not found. The bmsafemap lookup routine always
  * allocates a new structure if an existing one is not found.
  */
 #define DEPALLOC	0x0001	/* allocate structure if lookup fails */
 
 /*
  * Structures and routines associated with pagedep caching.
  */
 #define	PAGEDEP_HASH(ump, inum, lbn) \
 	(&(ump)->pagedep_hashtbl[((inum) + (lbn)) & (ump)->pagedep_hash_size])
 
 static int
 pagedep_find(pagedephd, ino, lbn, pagedeppp)
 	struct pagedep_hashhead *pagedephd;
 	ino_t ino;
 	ufs_lbn_t lbn;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 
 	LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 		if (ino == pagedep->pd_ino && lbn == pagedep->pd_lbn) {
 			*pagedeppp = pagedep;
 			return (1);
 		}
 	}
 	*pagedeppp = NULL;
 	return (0);
 }
 /*
  * Look up a pagedep. Return 1 if found, 0 otherwise.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in pagedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 pagedep_lookup(mp, bp, ino, lbn, flags, pagedeppp)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	ufs_lbn_t lbn;
 	int flags;
 	struct pagedep **pagedeppp;
 {
 	struct pagedep *pagedep;
 	struct pagedep_hashhead *pagedephd;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	int ret;
 	int i;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if (bp) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type == D_PAGEDEP) {
 				*pagedeppp = WK_PAGEDEP(wk);
 				return (1);
 			}
 		}
 	}
 	pagedephd = PAGEDEP_HASH(ump, ino, lbn);
 	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (ret) {
 		if (((*pagedeppp)->pd_state & ONWORKLIST) == 0 && bp)
 			WORKLIST_INSERT(&bp->b_dep, &(*pagedeppp)->pd_list);
 		return (1);
 	}
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	FREE_LOCK(ump);
 	pagedep = malloc(sizeof(struct pagedep),
 	    M_PAGEDEP, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&pagedep->pd_list, D_PAGEDEP, mp);
 	ACQUIRE_LOCK(ump);
 	ret = pagedep_find(pagedephd, ino, lbn, pagedeppp);
 	if (*pagedeppp) {
 		/*
 		 * This should never happen since we only create pagedeps
 		 * with the vnode lock held.  Could be an assert.
 		 */
 		WORKITEM_FREE(pagedep, D_PAGEDEP);
 		return (ret);
 	}
 	pagedep->pd_ino = ino;
 	pagedep->pd_lbn = lbn;
 	LIST_INIT(&pagedep->pd_dirremhd);
 	LIST_INIT(&pagedep->pd_pendinghd);
 	for (i = 0; i < DAHASHSZ; i++)
 		LIST_INIT(&pagedep->pd_diraddhd[i]);
 	LIST_INSERT_HEAD(pagedephd, pagedep, pd_hash);
 	WORKLIST_INSERT(&bp->b_dep, &pagedep->pd_list);
 	*pagedeppp = pagedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with inodedep caching.
  */
 #define	INODEDEP_HASH(ump, inum) \
       (&(ump)->inodedep_hashtbl[(inum) & (ump)->inodedep_hash_size])
 
 static int
 inodedep_find(inodedephd, inum, inodedeppp)
 	struct inodedep_hashhead *inodedephd;
 	ino_t inum;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 
 	LIST_FOREACH(inodedep, inodedephd, id_hash)
 		if (inum == inodedep->id_ino)
 			break;
 	if (inodedep) {
 		*inodedeppp = inodedep;
 		return (1);
 	}
 	*inodedeppp = NULL;
 
 	return (0);
 }
 /*
  * Look up an inodedep. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in inodedeppp.
  * This routine must be called with splbio interrupts blocked.
  */
 static int
 inodedep_lookup(mp, inum, flags, inodedeppp)
 	struct mount *mp;
 	ino_t inum;
 	int flags;
 	struct inodedep **inodedeppp;
 {
 	struct inodedep *inodedep;
 	struct inodedep_hashhead *inodedephd;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	inodedephd = INODEDEP_HASH(ump, inum);
 
 	if (inodedep_find(inodedephd, inum, inodedeppp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	/*
 	 * If the system is over its limit and our filesystem is
 	 * responsible for more than our share of that usage and
 	 * we are not in a rush, request some inodedep cleanup.
 	 */
 	if (softdep_excess_items(ump, D_INODEDEP))
 		schedule_cleanup(mp);
 	else
 		FREE_LOCK(ump);
 	inodedep = malloc(sizeof(struct inodedep),
 		M_INODEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&inodedep->id_list, D_INODEDEP, mp);
 	ACQUIRE_LOCK(ump);
 	if (inodedep_find(inodedephd, inum, inodedeppp)) {
 		WORKITEM_FREE(inodedep, D_INODEDEP);
 		return (1);
 	}
 	inodedep->id_fs = fs;
 	inodedep->id_ino = inum;
 	inodedep->id_state = ALLCOMPLETE;
 	inodedep->id_nlinkdelta = 0;
 	inodedep->id_savedino1 = NULL;
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
 	inodedep->id_savednlink = -1;
 	inodedep->id_bmsafemap = NULL;
 	inodedep->id_mkdiradd = NULL;
 	LIST_INIT(&inodedep->id_dirremhd);
 	LIST_INIT(&inodedep->id_pendinghd);
 	LIST_INIT(&inodedep->id_inowait);
 	LIST_INIT(&inodedep->id_bufwait);
 	TAILQ_INIT(&inodedep->id_inoreflst);
 	TAILQ_INIT(&inodedep->id_inoupdt);
 	TAILQ_INIT(&inodedep->id_newinoupdt);
 	TAILQ_INIT(&inodedep->id_extupdt);
 	TAILQ_INIT(&inodedep->id_newextupdt);
 	TAILQ_INIT(&inodedep->id_freeblklst);
 	LIST_INSERT_HEAD(inodedephd, inodedep, id_hash);
 	*inodedeppp = inodedep;
 	return (0);
 }
 
 /*
  * Structures and routines associated with newblk caching.
  */
 #define	NEWBLK_HASH(ump, inum) \
 	(&(ump)->newblk_hashtbl[(inum) & (ump)->newblk_hash_size])
 
 static int
 newblk_find(newblkhd, newblkno, flags, newblkpp)
 	struct newblk_hashhead *newblkhd;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 
 	LIST_FOREACH(newblk, newblkhd, nb_hash) {
 		if (newblkno != newblk->nb_newblkno)
 			continue;
 		/*
 		 * If we're creating a new dependency don't match those that
 		 * have already been converted to allocdirects.  This is for
 		 * a frag extend.
 		 */
 		if ((flags & DEPALLOC) && newblk->nb_list.wk_type != D_NEWBLK)
 			continue;
 		break;
 	}
 	if (newblk) {
 		*newblkpp = newblk;
 		return (1);
 	}
 	*newblkpp = NULL;
 	return (0);
 }
 
 /*
  * Look up a newblk. Return 1 if found, 0 if not found.
  * If not found, allocate if DEPALLOC flag is passed.
  * Found or allocated entry is returned in newblkpp.
  */
 static int
 newblk_lookup(mp, newblkno, flags, newblkpp)
 	struct mount *mp;
 	ufs2_daddr_t newblkno;
 	int flags;
 	struct newblk **newblkpp;
 {
 	struct newblk *newblk;
 	struct newblk_hashhead *newblkhd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	newblkhd = NEWBLK_HASH(ump, newblkno);
 	if (newblk_find(newblkhd, newblkno, flags, newblkpp))
 		return (1);
 	if ((flags & DEPALLOC) == 0)
 		return (0);
 	if (softdep_excess_items(ump, D_NEWBLK) ||
 	    softdep_excess_items(ump, D_ALLOCDIRECT) ||
 	    softdep_excess_items(ump, D_ALLOCINDIR))
 		schedule_cleanup(mp);
 	else
 		FREE_LOCK(ump);
 	newblk = malloc(sizeof(union allblk), M_NEWBLK,
 	    M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&newblk->nb_list, D_NEWBLK, mp);
 	ACQUIRE_LOCK(ump);
 	if (newblk_find(newblkhd, newblkno, flags, newblkpp)) {
 		WORKITEM_FREE(newblk, D_NEWBLK);
 		return (1);
 	}
 	newblk->nb_freefrag = NULL;
 	LIST_INIT(&newblk->nb_indirdeps);
 	LIST_INIT(&newblk->nb_newdirblk);
 	LIST_INIT(&newblk->nb_jwork);
 	newblk->nb_state = ATTACHED;
 	newblk->nb_newblkno = newblkno;
 	LIST_INSERT_HEAD(newblkhd, newblk, nb_hash);
 	*newblkpp = newblk;
 	return (0);
 }
 
 /*
  * Structures and routines associated with freed indirect block caching.
  */
 #define	INDIR_HASH(ump, blkno) \
 	(&(ump)->indir_hashtbl[(blkno) & (ump)->indir_hash_size])
 
 /*
  * Lookup an indirect block in the indir hash table.  The freework is
  * removed and potentially freed.  The caller must do a blocking journal
  * write before writing to the blkno.
  */
 static int
 indirblk_lookup(mp, blkno)
 	struct mount *mp;
 	ufs2_daddr_t blkno;
 {
 	struct freework *freework;
 	struct indir_hashhead *wkhd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	wkhd = INDIR_HASH(ump, blkno);
 	TAILQ_FOREACH(freework, wkhd, fw_next) {
 		if (freework->fw_blkno != blkno)
 			continue;
 		indirblk_remove(freework);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Insert an indirect block represented by freework into the indirblk
  * hash table so that it may prevent the block from being re-used prior
  * to the journal being written.
  */
 static void
 indirblk_insert(freework)
 	struct freework *freework;
 {
 	struct jblocks *jblocks;
 	struct jseg *jseg;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	jblocks = ump->softdep_jblocks;
 	jseg = TAILQ_LAST(&jblocks->jb_segs, jseglst);
 	if (jseg == NULL)
 		return;
 	
 	LIST_INSERT_HEAD(&jseg->js_indirs, freework, fw_segs);
 	TAILQ_INSERT_HEAD(INDIR_HASH(ump, freework->fw_blkno), freework,
 	    fw_next);
 	freework->fw_state &= ~DEPCOMPLETE;
 }
 
 static void
 indirblk_remove(freework)
 	struct freework *freework;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LIST_REMOVE(freework, fw_segs);
 	TAILQ_REMOVE(INDIR_HASH(ump, freework->fw_blkno), freework, fw_next);
 	freework->fw_state |= DEPCOMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
 }
 
 /*
  * Executed during filesystem system initialization before
  * mounting any filesystems.
  */
 void 
 softdep_initialize()
 {
 
 	TAILQ_INIT(&softdepmounts);
 #ifdef __LP64__
 	max_softdeps = desiredvnodes * 4;
 #else
 	max_softdeps = desiredvnodes * 2;
 #endif
 
 	/* initialise bioops hack */
 	bioops.io_start = softdep_disk_io_initiation;
 	bioops.io_complete = softdep_disk_write_complete;
 	bioops.io_deallocate = softdep_deallocate_dependencies;
 	bioops.io_countdeps = softdep_count_dependencies;
 	softdep_ast_cleanup = softdep_ast_cleanup_proc;
 
 	/* Initialize the callout with an mtx. */
 	callout_init_mtx(&softdep_callout, &lk, 0);
 }
 
 /*
  * Executed after all filesystems have been unmounted during
  * filesystem module unload.
  */
 void
 softdep_uninitialize()
 {
 
 	/* clear bioops hack */
 	bioops.io_start = NULL;
 	bioops.io_complete = NULL;
 	bioops.io_deallocate = NULL;
 	bioops.io_countdeps = NULL;
 	softdep_ast_cleanup = NULL;
 
 	callout_drain(&softdep_callout);
 }
 
 /*
  * Called at mount time to notify the dependency code that a
  * filesystem wishes to use it.
  */
 int
 softdep_mount(devvp, mp, fs, cred)
 	struct vnode *devvp;
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct csum_total cstotal;
 	struct mount_softdeps *sdp;
 	struct ufsmount *ump;
 	struct cg *cgp;
 	struct buf *bp;
 	int i, error, cyl;
 
 	sdp = malloc(sizeof(struct mount_softdeps), M_MOUNTDATA,
 	    M_WAITOK | M_ZERO);
 	MNT_ILOCK(mp);
 	mp->mnt_flag = (mp->mnt_flag & ~MNT_ASYNC) | MNT_SOFTDEP;
 	if ((mp->mnt_kern_flag & MNTK_SOFTDEP) == 0) {
 		mp->mnt_kern_flag = (mp->mnt_kern_flag & ~MNTK_ASYNC) | 
 			MNTK_SOFTDEP | MNTK_NOASYNC;
 	}
 	ump = VFSTOUFS(mp);
 	ump->um_softdep = sdp;
 	MNT_IUNLOCK(mp);
 	rw_init(LOCK_PTR(ump), "Per-Filesystem Softdep Lock");
 	sdp->sd_ump = ump;
 	LIST_INIT(&ump->softdep_workitem_pending);
 	LIST_INIT(&ump->softdep_journal_pending);
 	TAILQ_INIT(&ump->softdep_unlinked);
 	LIST_INIT(&ump->softdep_dirtycg);
 	ump->softdep_worklist_tail = NULL;
 	ump->softdep_on_worklist = 0;
 	ump->softdep_deps = 0;
 	LIST_INIT(&ump->softdep_mkdirlisthd);
 	ump->pagedep_hashtbl = hashinit(desiredvnodes / 5, M_PAGEDEP,
 	    &ump->pagedep_hash_size);
 	ump->pagedep_nextclean = 0;
 	ump->inodedep_hashtbl = hashinit(desiredvnodes, M_INODEDEP,
 	    &ump->inodedep_hash_size);
 	ump->inodedep_nextclean = 0;
 	ump->newblk_hashtbl = hashinit(max_softdeps / 2,  M_NEWBLK,
 	    &ump->newblk_hash_size);
 	ump->bmsafemap_hashtbl = hashinit(1024, M_BMSAFEMAP,
 	    &ump->bmsafemap_hash_size);
 	i = 1 << (ffs(desiredvnodes / 10) - 1);
 	ump->indir_hashtbl = malloc(i * sizeof(struct indir_hashhead),
 	    M_FREEWORK, M_WAITOK);
 	ump->indir_hash_size = i - 1;
 	for (i = 0; i <= ump->indir_hash_size; i++)
 		TAILQ_INIT(&ump->indir_hashtbl[i]);
 	ACQUIRE_GBLLOCK(&lk);
 	TAILQ_INSERT_TAIL(&softdepmounts, sdp, sd_next);
 	FREE_GBLLOCK(&lk);
 	if ((fs->fs_flags & FS_SUJ) &&
 	    (error = journal_mount(mp, fs, cred)) != 0) {
 		printf("Failed to start journal: %d\n", error);
 		softdep_unmount(mp);
 		return (error);
 	}
 	/*
 	 * Start our flushing thread in the bufdaemon process.
 	 */
 	ACQUIRE_LOCK(ump);
 	ump->softdep_flags |= FLUSH_STARTING;
 	FREE_LOCK(ump);
 	kproc_kthread_add(&softdep_flush, mp, &bufdaemonproc,
 	    &ump->softdep_flushtd, 0, 0, "softdepflush", "%s worker",
 	    mp->mnt_stat.f_mntonname);
 	ACQUIRE_LOCK(ump);
 	while ((ump->softdep_flags & FLUSH_STARTING) != 0) {
 		msleep(&ump->softdep_flushtd, LOCK_PTR(ump), PVM, "sdstart",
 		    hz / 2);
 	}
 	FREE_LOCK(ump);
 	/*
 	 * When doing soft updates, the counters in the
 	 * superblock may have gotten out of sync. Recomputation
 	 * can take a long time and can be deferred for background
 	 * fsck.  However, the old behavior of scanning the cylinder
 	 * groups and recalculating them at mount time is available
 	 * by setting vfs.ffs.compute_summary_at_mount to one.
 	 */
 	if (compute_summary_at_mount == 0 || fs->fs_clean != 0)
 		return (0);
 	bzero(&cstotal, sizeof cstotal);
 	for (cyl = 0; cyl < fs->fs_ncg; cyl++) {
 		if ((error = bread(devvp, fsbtodb(fs, cgtod(fs, cyl)),
 		    fs->fs_cgsize, cred, &bp)) != 0) {
 			brelse(bp);
 			softdep_unmount(mp);
 			return (error);
 		}
 		cgp = (struct cg *)bp->b_data;
 		cstotal.cs_nffree += cgp->cg_cs.cs_nffree;
 		cstotal.cs_nbfree += cgp->cg_cs.cs_nbfree;
 		cstotal.cs_nifree += cgp->cg_cs.cs_nifree;
 		cstotal.cs_ndir += cgp->cg_cs.cs_ndir;
 		fs->fs_cs(fs, cyl) = cgp->cg_cs;
 		brelse(bp);
 	}
 #ifdef DEBUG
 	if (bcmp(&cstotal, &fs->fs_cstotal, sizeof cstotal))
 		printf("%s: superblock summary recomputed\n", fs->fs_fsmnt);
 #endif
 	bcopy(&cstotal, &fs->fs_cstotal, sizeof cstotal);
 	return (0);
 }
 
 void
 softdep_unmount(mp)
 	struct mount *mp;
 {
 	struct ufsmount *ump;
 #ifdef INVARIANTS
 	int i;
 #endif
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_unmount called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	MNT_ILOCK(mp);
 	mp->mnt_flag &= ~MNT_SOFTDEP;
 	if (MOUNTEDSUJ(mp) == 0) {
 		MNT_IUNLOCK(mp);
 	} else {
 		mp->mnt_flag &= ~MNT_SUJ;
 		MNT_IUNLOCK(mp);
 		journal_unmount(ump);
 	}
 	/*
 	 * Shut down our flushing thread. Check for NULL is if
 	 * softdep_mount errors out before the thread has been created.
 	 */
 	if (ump->softdep_flushtd != NULL) {
 		ACQUIRE_LOCK(ump);
 		ump->softdep_flags |= FLUSH_EXIT;
 		wakeup(&ump->softdep_flushtd);
 		msleep(&ump->softdep_flags, LOCK_PTR(ump), PVM | PDROP,
 		    "sdwait", 0);
 		KASSERT((ump->softdep_flags & FLUSH_EXIT) == 0,
 		    ("Thread shutdown failed"));
 	}
 	/*
 	 * Free up our resources.
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	TAILQ_REMOVE(&softdepmounts, ump->um_softdep, sd_next);
 	FREE_GBLLOCK(&lk);
 	rw_destroy(LOCK_PTR(ump));
 	hashdestroy(ump->pagedep_hashtbl, M_PAGEDEP, ump->pagedep_hash_size);
 	hashdestroy(ump->inodedep_hashtbl, M_INODEDEP, ump->inodedep_hash_size);
 	hashdestroy(ump->newblk_hashtbl, M_NEWBLK, ump->newblk_hash_size);
 	hashdestroy(ump->bmsafemap_hashtbl, M_BMSAFEMAP,
 	    ump->bmsafemap_hash_size);
 	free(ump->indir_hashtbl, M_FREEWORK);
 #ifdef INVARIANTS
 	for (i = 0; i <= D_LAST; i++)
 		KASSERT(ump->softdep_curdeps[i] == 0,
 		    ("Unmount %s: Dep type %s != 0 (%ld)", ump->um_fs->fs_fsmnt,
 		    TYPENAME(i), ump->softdep_curdeps[i]));
 #endif
 	free(ump->um_softdep, M_MOUNTDATA);
 }
 
 static struct jblocks *
 jblocks_create(void)
 {
 	struct jblocks *jblocks;
 
 	jblocks = malloc(sizeof(*jblocks), M_JBLOCKS, M_WAITOK | M_ZERO);
 	TAILQ_INIT(&jblocks->jb_segs);
 	jblocks->jb_avail = 10;
 	jblocks->jb_extent = malloc(sizeof(struct jextent) * jblocks->jb_avail,
 	    M_JBLOCKS, M_WAITOK | M_ZERO);
 
 	return (jblocks);
 }
 
 static ufs2_daddr_t
 jblocks_alloc(jblocks, bytes, actual)
 	struct jblocks *jblocks;
 	int bytes;
 	int *actual;
 {
 	ufs2_daddr_t daddr;
 	struct jextent *jext;
 	int freecnt;
 	int blocks;
 
 	blocks = bytes / DEV_BSIZE;
 	jext = &jblocks->jb_extent[jblocks->jb_head];
 	freecnt = jext->je_blocks - jblocks->jb_off;
 	if (freecnt == 0) {
 		jblocks->jb_off = 0;
 		if (++jblocks->jb_head > jblocks->jb_used)
 			jblocks->jb_head = 0;
 		jext = &jblocks->jb_extent[jblocks->jb_head];
 		freecnt = jext->je_blocks;
 	}
 	if (freecnt > blocks)
 		freecnt = blocks;
 	*actual = freecnt * DEV_BSIZE;
 	daddr = jext->je_daddr + jblocks->jb_off;
 	jblocks->jb_off += freecnt;
 	jblocks->jb_free -= freecnt;
 
 	return (daddr);
 }
 
 static void
 jblocks_free(jblocks, mp, bytes)
 	struct jblocks *jblocks;
 	struct mount *mp;
 	int bytes;
 {
 
 	LOCK_OWNED(VFSTOUFS(mp));
 	jblocks->jb_free += bytes / DEV_BSIZE;
 	if (jblocks->jb_suspended)
 		worklist_speedup(mp);
 	wakeup(jblocks);
 }
 
 static void
 jblocks_destroy(jblocks)
 	struct jblocks *jblocks;
 {
 
 	if (jblocks->jb_extent)
 		free(jblocks->jb_extent, M_JBLOCKS);
 	free(jblocks, M_JBLOCKS);
 }
 
 static void
 jblocks_add(jblocks, daddr, blocks)
 	struct jblocks *jblocks;
 	ufs2_daddr_t daddr;
 	int blocks;
 {
 	struct jextent *jext;
 
 	jblocks->jb_blocks += blocks;
 	jblocks->jb_free += blocks;
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	/* Adding the first block. */
 	if (jext->je_daddr == 0) {
 		jext->je_daddr = daddr;
 		jext->je_blocks = blocks;
 		return;
 	}
 	/* Extending the last extent. */
 	if (jext->je_daddr + jext->je_blocks == daddr) {
 		jext->je_blocks += blocks;
 		return;
 	}
 	/* Adding a new extent. */
 	if (++jblocks->jb_used == jblocks->jb_avail) {
 		jblocks->jb_avail *= 2;
 		jext = malloc(sizeof(struct jextent) * jblocks->jb_avail,
 		    M_JBLOCKS, M_WAITOK | M_ZERO);
 		memcpy(jext, jblocks->jb_extent,
 		    sizeof(struct jextent) * jblocks->jb_used);
 		free(jblocks->jb_extent, M_JBLOCKS);
 		jblocks->jb_extent = jext;
 	}
 	jext = &jblocks->jb_extent[jblocks->jb_used];
 	jext->je_daddr = daddr;
 	jext->je_blocks = blocks;
 	return;
 }
 
 int
 softdep_journal_lookup(mp, vpp)
 	struct mount *mp;
 	struct vnode **vpp;
 {
 	struct componentname cnp;
 	struct vnode *dvp;
 	ino_t sujournal;
 	int error;
 
 	error = VFS_VGET(mp, ROOTINO, LK_EXCLUSIVE, &dvp);
 	if (error)
 		return (error);
 	bzero(&cnp, sizeof(cnp));
 	cnp.cn_nameiop = LOOKUP;
 	cnp.cn_flags = ISLASTCN;
 	cnp.cn_thread = curthread;
 	cnp.cn_cred = curthread->td_ucred;
 	cnp.cn_pnbuf = SUJ_FILE;
 	cnp.cn_nameptr = SUJ_FILE;
 	cnp.cn_namelen = strlen(SUJ_FILE);
 	error = ufs_lookup_ino(dvp, NULL, &cnp, &sujournal);
 	vput(dvp);
 	if (error != 0)
 		return (error);
 	error = VFS_VGET(mp, sujournal, LK_EXCLUSIVE, vpp);
 	return (error);
 }
 
 /*
  * Open and verify the journal file.
  */
 static int
 journal_mount(mp, fs, cred)
 	struct mount *mp;
 	struct fs *fs;
 	struct ucred *cred;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	int bcount;
 	int error;
 	int i;
 
 	ump = VFSTOUFS(mp);
 	ump->softdep_journal_tail = NULL;
 	ump->softdep_on_journal = 0;
 	ump->softdep_accdeps = 0;
 	ump->softdep_req = 0;
 	ump->softdep_jblocks = NULL;
 	error = softdep_journal_lookup(mp, &vp);
 	if (error != 0) {
 		printf("Failed to find journal.  Use tunefs to create one\n");
 		return (error);
 	}
 	ip = VTOI(vp);
 	if (ip->i_size < SUJ_MIN) {
 		error = ENOSPC;
 		goto out;
 	}
 	bcount = lblkno(fs, ip->i_size);	/* Only use whole blocks. */
 	jblocks = jblocks_create();
 	for (i = 0; i < bcount; i++) {
 		error = ufs_bmaparray(vp, i, &blkno, NULL, NULL, NULL);
 		if (error)
 			break;
 		jblocks_add(jblocks, blkno, fsbtodb(fs, fs->fs_frag));
 	}
 	if (error) {
 		jblocks_destroy(jblocks);
 		goto out;
 	}
 	jblocks->jb_low = jblocks->jb_free / 3;	/* Reserve 33%. */
 	jblocks->jb_min = jblocks->jb_free / 10; /* Suspend at 10%. */
 	ump->softdep_jblocks = jblocks;
 out:
 	if (error == 0) {
 		MNT_ILOCK(mp);
 		mp->mnt_flag |= MNT_SUJ;
 		mp->mnt_flag &= ~MNT_SOFTDEP;
 		MNT_IUNLOCK(mp);
 		/*
 		 * Only validate the journal contents if the
 		 * filesystem is clean, otherwise we write the logs
 		 * but they'll never be used.  If the filesystem was
 		 * still dirty when we mounted it the journal is
 		 * invalid and a new journal can only be valid if it
 		 * starts from a clean mount.
 		 */
 		if (fs->fs_clean) {
 			DIP_SET(ip, i_modrev, fs->fs_mtime);
 			ip->i_flags |= IN_MODIFIED;
 			ffs_update(vp, 1);
 		}
 	}
 	vput(vp);
 	return (error);
 }
 
 static void
 journal_unmount(ump)
 	struct ufsmount *ump;
 {
 
 	if (ump->softdep_jblocks)
 		jblocks_destroy(ump->softdep_jblocks);
 	ump->softdep_jblocks = NULL;
 }
 
 /*
  * Called when a journal record is ready to be written.  Space is allocated
  * and the journal entry is created when the journal is flushed to stable
  * store.
  */
 static void
 add_to_journal(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 	if (wk->wk_state & ONWORKLIST)
 		panic("add_to_journal: %s(0x%X) already on list",
 		    TYPENAME(wk->wk_type), wk->wk_state);
 	wk->wk_state |= ONWORKLIST | DEPCOMPLETE;
 	if (LIST_EMPTY(&ump->softdep_journal_pending)) {
 		ump->softdep_jblocks->jb_age = ticks;
 		LIST_INSERT_HEAD(&ump->softdep_journal_pending, wk, wk_list);
 	} else
 		LIST_INSERT_AFTER(ump->softdep_journal_tail, wk, wk_list);
 	ump->softdep_journal_tail = wk;
 	ump->softdep_on_journal += 1;
 }
 
 /*
  * Remove an arbitrary item for the journal worklist maintain the tail
  * pointer.  This happens when a new operation obviates the need to
  * journal an old operation.
  */
 static void
 remove_from_journal(wk)
 	struct worklist *wk;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(wk->wk_mp);
 	LOCK_OWNED(ump);
 #ifdef SUJ_DEBUG
 	{
 		struct worklist *wkn;
 
 		LIST_FOREACH(wkn, &ump->softdep_journal_pending, wk_list)
 			if (wkn == wk)
 				break;
 		if (wkn == NULL)
 			panic("remove_from_journal: %p is not in journal", wk);
 	}
 #endif
 	/*
 	 * We emulate a TAILQ to save space in most structures which do not
 	 * require TAILQ semantics.  Here we must update the tail position
 	 * when removing the tail which is not the final entry. This works
 	 * only if the worklist linkage are at the beginning of the structure.
 	 */
 	if (ump->softdep_journal_tail == wk)
 		ump->softdep_journal_tail =
 		    (struct worklist *)wk->wk_list.le_prev;
 
 	WORKLIST_REMOVE(wk);
 	ump->softdep_on_journal -= 1;
 }
 
 /*
  * Check for journal space as well as dependency limits so the prelink
  * code can throttle both journaled and non-journaled filesystems.
  * Threshold is 0 for low and 1 for min.
  */
 static int
 journal_space(ump, thresh)
 	struct ufsmount *ump;
 	int thresh;
 {
 	struct jblocks *jblocks;
 	int limit, avail;
 
 	jblocks = ump->softdep_jblocks;
 	if (jblocks == NULL)
 		return (1);
 	/*
 	 * We use a tighter restriction here to prevent request_cleanup()
 	 * running in threads from running into locks we currently hold.
 	 * We have to be over the limit and our filesystem has to be
 	 * responsible for more than our share of that usage.
 	 */
 	limit = (max_softdeps / 10) * 9;
 	if (dep_current[D_INODEDEP] > limit &&
 	    ump->softdep_curdeps[D_INODEDEP] > limit / stat_flush_threads)
 		return (0);
 	if (thresh)
 		thresh = jblocks->jb_min;
 	else
 		thresh = jblocks->jb_low;
 	avail = (ump->softdep_on_journal * JREC_SIZE) / DEV_BSIZE;
 	avail = jblocks->jb_free - avail;
 
 	return (avail > thresh);
 }
 
 static void
 journal_suspend(ump)
 	struct ufsmount *ump;
 {
 	struct jblocks *jblocks;
 	struct mount *mp;
 
 	mp = UFSTOVFS(ump);
 	jblocks = ump->softdep_jblocks;
 	MNT_ILOCK(mp);
 	if ((mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
 		stat_journal_min++;
 		mp->mnt_kern_flag |= MNTK_SUSPEND;
 		mp->mnt_susp_owner = ump->softdep_flushtd;
 	}
 	jblocks->jb_suspended = 1;
 	MNT_IUNLOCK(mp);
 }
 
 static int
 journal_unsuspend(struct ufsmount *ump)
 {
 	struct jblocks *jblocks;
 	struct mount *mp;
 
 	mp = UFSTOVFS(ump);
 	jblocks = ump->softdep_jblocks;
 
 	if (jblocks != NULL && jblocks->jb_suspended &&
 	    journal_space(ump, jblocks->jb_min)) {
 		jblocks->jb_suspended = 0;
 		FREE_LOCK(ump);
 		mp->mnt_susp_owner = curthread;
 		vfs_write_resume(mp, 0);
 		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Called before any allocation function to be certain that there is
  * sufficient space in the journal prior to creating any new records.
  * Since in the case of block allocation we may have multiple locked
  * buffers at the time of the actual allocation we can not block
  * when the journal records are created.  Doing so would create a deadlock
  * if any of these buffers needed to be flushed to reclaim space.  Instead
  * we require a sufficiently large amount of available space such that
  * each thread in the system could have passed this allocation check and
  * still have sufficient free space.  With 20% of a minimum journal size
  * of 1MB we have 6553 records available.
  */
 int
 softdep_prealloc(vp, waitok)
 	struct vnode *vp;
 	int waitok;
 {
 	struct ufsmount *ump;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_prealloc called on non-softdep filesystem"));
 	/*
 	 * Nothing to do if we are not running journaled soft updates.
 	 * If we currently hold the snapshot lock, we must avoid
 	 * handling other resources that could cause deadlock.  Do not
 	 * touch quotas vnode since it is typically recursed with
 	 * other vnode locks held.
 	 */
 	if (DOINGSUJ(vp) == 0 || IS_SNAPSHOT(VTOI(vp)) ||
 	    (vp->v_vflag & VV_SYSTEM) != 0)
 		return (0);
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	if (journal_space(ump, 0)) {
 		FREE_LOCK(ump);
 		return (0);
 	}
 	stat_journal_low++;
 	FREE_LOCK(ump);
 	if (waitok == MNT_NOWAIT)
 		return (ENOSPC);
 	/*
 	 * Attempt to sync this vnode once to flush any journal
 	 * work attached to it.
 	 */
 	if ((curthread->td_pflags & TDP_COWINPROGRESS) == 0)
 		ffs_syncvnode(vp, waitok, 0);
 	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
 	if (journal_space(ump, 0) == 0) {
 		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
 	FREE_LOCK(ump);
 
 	return (0);
 }
 
 /*
  * Before adjusting a link count on a vnode verify that we have sufficient
  * journal space.  If not, process operations that depend on the currently
  * locked pair of vnodes to try to flush space as the syncer, buf daemon,
  * and softdep flush threads can not acquire these locks to reclaim space.
  */
 static void
 softdep_prelink(dvp, vp)
 	struct vnode *dvp;
 	struct vnode *vp;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(dvp->v_mount);
 	LOCK_OWNED(ump);
 	/*
 	 * Nothing to do if we have sufficient journal space.
 	 * If we currently hold the snapshot lock, we must avoid
 	 * handling other resources that could cause deadlock.
 	 */
 	if (journal_space(ump, 0) || (vp && IS_SNAPSHOT(VTOI(vp))))
 		return;
 	stat_journal_low++;
 	FREE_LOCK(ump);
 	if (vp)
 		ffs_syncvnode(vp, MNT_NOWAIT, 0);
 	ffs_syncvnode(dvp, MNT_WAIT, 0);
 	ACQUIRE_LOCK(ump);
 	/* Process vp before dvp as it may create .. removes. */
 	if (vp) {
 		process_removes(vp);
 		process_truncates(vp);
 	}
 	process_removes(dvp);
 	process_truncates(dvp);
 	softdep_speedup(ump);
 	process_worklist_item(UFSTOVFS(ump), 2, LK_NOWAIT);
 	if (journal_space(ump, 0) == 0) {
 		softdep_speedup(ump);
 		if (journal_space(ump, 1) == 0)
 			journal_suspend(ump);
 	}
 }
 
 static void
 jseg_write(ump, jseg, data)
 	struct ufsmount *ump;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jsegrec *rec;
 
 	rec = (struct jsegrec *)data;
 	rec->jsr_seq = jseg->js_seq;
 	rec->jsr_oldest = jseg->js_oldseq;
 	rec->jsr_cnt = jseg->js_cnt;
 	rec->jsr_blocks = jseg->js_size / ump->um_devvp->v_bufobj.bo_bsize;
 	rec->jsr_crc = 0;
 	rec->jsr_time = ump->um_fs->fs_mtime;
 }
 
 static inline void
 inoref_write(inoref, jseg, rec)
 	struct inoref *inoref;
 	struct jseg *jseg;
 	struct jrefrec *rec;
 {
 
 	inoref->if_jsegdep->jd_seg = jseg;
 	rec->jr_ino = inoref->if_ino;
 	rec->jr_parent = inoref->if_parent;
 	rec->jr_nlink = inoref->if_nlink;
 	rec->jr_mode = inoref->if_mode;
 	rec->jr_diroff = inoref->if_diroff;
 }
 
 static void
 jaddref_write(jaddref, jseg, data)
 	struct jaddref *jaddref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_ADDREF;
 	inoref_write(&jaddref->ja_ref, jseg, rec);
 }
 
 static void
 jremref_write(jremref, jseg, data)
 	struct jremref *jremref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jrefrec *rec;
 
 	rec = (struct jrefrec *)data;
 	rec->jr_op = JOP_REMREF;
 	inoref_write(&jremref->jr_ref, jseg, rec);
 }
 
 static void
 jmvref_write(jmvref, jseg, data)
 	struct jmvref *jmvref;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jmvrec *rec;
 
 	rec = (struct jmvrec *)data;
 	rec->jm_op = JOP_MVREF;
 	rec->jm_ino = jmvref->jm_ino;
 	rec->jm_parent = jmvref->jm_parent;
 	rec->jm_oldoff = jmvref->jm_oldoff;
 	rec->jm_newoff = jmvref->jm_newoff;
 }
 
 static void
 jnewblk_write(jnewblk, jseg, data)
 	struct jnewblk *jnewblk;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jnewblk->jn_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_NEWBLK;
 	rec->jb_ino = jnewblk->jn_ino;
 	rec->jb_blkno = jnewblk->jn_blkno;
 	rec->jb_lbn = jnewblk->jn_lbn;
 	rec->jb_frags = jnewblk->jn_frags;
 	rec->jb_oldfrags = jnewblk->jn_oldfrags;
 }
 
 static void
 jfreeblk_write(jfreeblk, jseg, data)
 	struct jfreeblk *jfreeblk;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jfreeblk->jf_dep.jb_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreeblk->jf_ino;
 	rec->jb_blkno = jfreeblk->jf_blkno;
 	rec->jb_lbn = jfreeblk->jf_lbn;
 	rec->jb_frags = jfreeblk->jf_frags;
 	rec->jb_oldfrags = 0;
 }
 
 static void
 jfreefrag_write(jfreefrag, jseg, data)
 	struct jfreefrag *jfreefrag;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jblkrec *rec;
 
 	jfreefrag->fr_jsegdep->jd_seg = jseg;
 	rec = (struct jblkrec *)data;
 	rec->jb_op = JOP_FREEBLK;
 	rec->jb_ino = jfreefrag->fr_ino;
 	rec->jb_blkno = jfreefrag->fr_blkno;
 	rec->jb_lbn = jfreefrag->fr_lbn;
 	rec->jb_frags = jfreefrag->fr_frags;
 	rec->jb_oldfrags = 0;
 }
 
 static void
 jtrunc_write(jtrunc, jseg, data)
 	struct jtrunc *jtrunc;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jtrncrec *rec;
 
 	jtrunc->jt_dep.jb_jsegdep->jd_seg = jseg;
 	rec = (struct jtrncrec *)data;
 	rec->jt_op = JOP_TRUNC;
 	rec->jt_ino = jtrunc->jt_ino;
 	rec->jt_size = jtrunc->jt_size;
 	rec->jt_extsize = jtrunc->jt_extsize;
 }
 
 static void
 jfsync_write(jfsync, jseg, data)
 	struct jfsync *jfsync;
 	struct jseg *jseg;
 	uint8_t *data;
 {
 	struct jtrncrec *rec;
 
 	rec = (struct jtrncrec *)data;
 	rec->jt_op = JOP_SYNC;
 	rec->jt_ino = jfsync->jfs_ino;
 	rec->jt_size = jfsync->jfs_size;
 	rec->jt_extsize = jfsync->jfs_extsize;
 }
 
 static void
 softdep_flushjournal(mp)
 	struct mount *mp;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	ump = VFSTOUFS(mp);
 	jblocks = ump->softdep_jblocks;
 	ACQUIRE_LOCK(ump);
 	while (ump->softdep_on_journal) {
 		jblocks->jb_needseg = 1;
 		softdep_process_journal(mp, NULL, MNT_WAIT);
 	}
 	FREE_LOCK(ump);
 }
 
 static void softdep_synchronize_completed(struct bio *);
 static void softdep_synchronize(struct bio *, struct ufsmount *, void *);
 
 static void
 softdep_synchronize_completed(bp)
         struct bio *bp;
 {
 	struct jseg *oldest;
 	struct jseg *jseg;
 	struct ufsmount *ump;
 
 	/*
 	 * caller1 marks the last segment written before we issued the
 	 * synchronize cache.
 	 */
 	jseg = bp->bio_caller1;
 	if (jseg == NULL) {
 		g_destroy_bio(bp);
 		return;
 	}
 	ump = VFSTOUFS(jseg->js_list.wk_mp);
 	ACQUIRE_LOCK(ump);
 	oldest = NULL;
 	/*
 	 * Mark all the journal entries waiting on the synchronize cache
 	 * as completed so they may continue on.
 	 */
 	while (jseg != NULL && (jseg->js_state & COMPLETE) == 0) {
 		jseg->js_state |= COMPLETE;
 		oldest = jseg;
 		jseg = TAILQ_PREV(jseg, jseglst, js_next);
 	}
 	/*
 	 * Restart deferred journal entry processing from the oldest
 	 * completed jseg.
 	 */
 	if (oldest)
 		complete_jsegs(oldest);
 
 	FREE_LOCK(ump);
 	g_destroy_bio(bp);
 }
 
 /*
  * Send BIO_FLUSH/SYNCHRONIZE CACHE to the device to enforce write ordering
  * barriers.  The journal must be written prior to any blocks that depend
  * on it and the journal can not be released until the blocks have be
  * written.  This code handles both barriers simultaneously.
  */
 static void
 softdep_synchronize(bp, ump, caller1)
 	struct bio *bp;
 	struct ufsmount *ump;
 	void *caller1;
 {
 
 	bp->bio_cmd = BIO_FLUSH;
 	bp->bio_flags |= BIO_ORDERED;
 	bp->bio_data = NULL;
 	bp->bio_offset = ump->um_cp->provider->mediasize;
 	bp->bio_length = 0;
 	bp->bio_done = softdep_synchronize_completed;
 	bp->bio_caller1 = caller1;
 	g_io_request(bp,
 	    (struct g_consumer *)ump->um_devvp->v_bufobj.bo_private);
 }
 
 /*
  * Flush some journal records to disk.
  */
 static void
 softdep_process_journal(mp, needwk, flags)
 	struct mount *mp;
 	struct worklist *needwk;
 	int flags;
 {
 	struct jblocks *jblocks;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct jseg *jseg;
 	struct buf *bp;
 	struct bio *bio;
 	uint8_t *data;
 	struct fs *fs;
 	int shouldflush;
 	int segwritten;
 	int jrecmin;	/* Minimum records per block. */
 	int jrecmax;	/* Maximum records per block. */
 	int size;
 	int cnt;
 	int off;
 	int devbsize;
 
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	shouldflush = softdep_flushcache;
 	bio = NULL;
 	jseg = NULL;
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	jblocks = ump->softdep_jblocks;
 	devbsize = ump->um_devvp->v_bufobj.bo_bsize;
 	/*
 	 * We write anywhere between a disk block and fs block.  The upper
 	 * bound is picked to prevent buffer cache fragmentation and limit
 	 * processing time per I/O.
 	 */
 	jrecmin = (devbsize / JREC_SIZE) - 1; /* -1 for seg header */
 	jrecmax = (fs->fs_bsize / devbsize) * jrecmin;
 	segwritten = 0;
 	for (;;) {
 		cnt = ump->softdep_on_journal;
 		/*
 		 * Criteria for writing a segment:
 		 * 1) We have a full block.
 		 * 2) We're called from jwait() and haven't found the
 		 *    journal item yet.
 		 * 3) Always write if needseg is set.
 		 * 4) If we are called from process_worklist and have
 		 *    not yet written anything we write a partial block
 		 *    to enforce a 1 second maximum latency on journal
 		 *    entries.
 		 */
 		if (cnt < (jrecmax - 1) && needwk == NULL &&
 		    jblocks->jb_needseg == 0 && (segwritten || cnt == 0))
 			break;
 		cnt++;
 		/*
 		 * Verify some free journal space.  softdep_prealloc() should
 		 * guarantee that we don't run out so this is indicative of
 		 * a problem with the flow control.  Try to recover
 		 * gracefully in any event.
 		 */
 		while (jblocks->jb_free == 0) {
 			if (flags != MNT_WAIT)
 				break;
 			printf("softdep: Out of journal space!\n");
 			softdep_speedup(ump);
 			msleep(jblocks, LOCK_PTR(ump), PRIBIO, "jblocks", hz);
 		}
 		FREE_LOCK(ump);
 		jseg = malloc(sizeof(*jseg), M_JSEG, M_SOFTDEP_FLAGS);
 		workitem_alloc(&jseg->js_list, D_JSEG, mp);
 		LIST_INIT(&jseg->js_entries);
 		LIST_INIT(&jseg->js_indirs);
 		jseg->js_state = ATTACHED;
 		if (shouldflush == 0)
 			jseg->js_state |= COMPLETE;
 		else if (bio == NULL)
 			bio = g_alloc_bio();
 		jseg->js_jblocks = jblocks;
 		bp = geteblk(fs->fs_bsize, 0);
 		ACQUIRE_LOCK(ump);
 		/*
 		 * If there was a race while we were allocating the block
 		 * and jseg the entry we care about was likely written.
 		 * We bail out in both the WAIT and NOWAIT case and assume
 		 * the caller will loop if the entry it cares about is
 		 * not written.
 		 */
 		cnt = ump->softdep_on_journal;
 		if (cnt + jblocks->jb_needseg == 0 || jblocks->jb_free == 0) {
 			bp->b_flags |= B_INVAL | B_NOCACHE;
 			WORKITEM_FREE(jseg, D_JSEG);
 			FREE_LOCK(ump);
 			brelse(bp);
 			ACQUIRE_LOCK(ump);
 			break;
 		}
 		/*
 		 * Calculate the disk block size required for the available
 		 * records rounded to the min size.
 		 */
 		if (cnt == 0)
 			size = devbsize;
 		else if (cnt < jrecmax)
 			size = howmany(cnt, jrecmin) * devbsize;
 		else
 			size = fs->fs_bsize;
 		/*
 		 * Allocate a disk block for this journal data and account
 		 * for truncation of the requested size if enough contiguous
 		 * space was not available.
 		 */
 		bp->b_blkno = jblocks_alloc(jblocks, size, &size);
 		bp->b_lblkno = bp->b_blkno;
 		bp->b_offset = bp->b_blkno * DEV_BSIZE;
 		bp->b_bcount = size;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_flags |= B_VALIDSUSPWRT | B_NOCOPY;
 		/*
 		 * Initialize our jseg with cnt records.  Assign the next
 		 * sequence number to it and link it in-order.
 		 */
 		cnt = MIN(cnt, (size / devbsize) * jrecmin);
 		jseg->js_buf = bp;
 		jseg->js_cnt = cnt;
 		jseg->js_refs = cnt + 1;	/* Self ref. */
 		jseg->js_size = size;
 		jseg->js_seq = jblocks->jb_nextseq++;
 		if (jblocks->jb_oldestseg == NULL)
 			jblocks->jb_oldestseg = jseg;
 		jseg->js_oldseq = jblocks->jb_oldestseg->js_seq;
 		TAILQ_INSERT_TAIL(&jblocks->jb_segs, jseg, js_next);
 		if (jblocks->jb_writeseg == NULL)
 			jblocks->jb_writeseg = jseg;
 		/*
 		 * Start filling in records from the pending list.
 		 */
 		data = bp->b_data;
 		off = 0;
 
 		/*
 		 * Always put a header on the first block.
 		 * XXX As with below, there might not be a chance to get
 		 * into the loop.  Ensure that something valid is written.
 		 */
 		jseg_write(ump, jseg, data);
 		off += JREC_SIZE;
 		data = bp->b_data + off;
 
 		/*
 		 * XXX Something is wrong here.  There's no work to do,
 		 * but we need to perform and I/O and allow it to complete
 		 * anyways.
 		 */
 		if (LIST_EMPTY(&ump->softdep_journal_pending))
 			stat_emptyjblocks++;
 
 		while ((wk = LIST_FIRST(&ump->softdep_journal_pending))
 		    != NULL) {
 			if (cnt == 0)
 				break;
 			/* Place a segment header on every device block. */
 			if ((off % devbsize) == 0) {
 				jseg_write(ump, jseg, data);
 				off += JREC_SIZE;
 				data = bp->b_data + off;
 			}
 			if (wk == needwk)
 				needwk = NULL;
 			remove_from_journal(wk);
 			wk->wk_state |= INPROGRESS;
 			WORKLIST_INSERT(&jseg->js_entries, wk);
 			switch (wk->wk_type) {
 			case D_JADDREF:
 				jaddref_write(WK_JADDREF(wk), jseg, data);
 				break;
 			case D_JREMREF:
 				jremref_write(WK_JREMREF(wk), jseg, data);
 				break;
 			case D_JMVREF:
 				jmvref_write(WK_JMVREF(wk), jseg, data);
 				break;
 			case D_JNEWBLK:
 				jnewblk_write(WK_JNEWBLK(wk), jseg, data);
 				break;
 			case D_JFREEBLK:
 				jfreeblk_write(WK_JFREEBLK(wk), jseg, data);
 				break;
 			case D_JFREEFRAG:
 				jfreefrag_write(WK_JFREEFRAG(wk), jseg, data);
 				break;
 			case D_JTRUNC:
 				jtrunc_write(WK_JTRUNC(wk), jseg, data);
 				break;
 			case D_JFSYNC:
 				jfsync_write(WK_JFSYNC(wk), jseg, data);
 				break;
 			default:
 				panic("process_journal: Unknown type %s",
 				    TYPENAME(wk->wk_type));
 				/* NOTREACHED */
 			}
 			off += JREC_SIZE;
 			data = bp->b_data + off;
 			cnt--;
 		}
 
 		/* Clear any remaining space so we don't leak kernel data */
 		if (size > off)
 			bzero(data, size - off);
 
 		/*
 		 * Write this one buffer and continue.
 		 */
 		segwritten = 1;
 		jblocks->jb_needseg = 0;
 		WORKLIST_INSERT(&bp->b_dep, &jseg->js_list);
 		FREE_LOCK(ump);
 		pbgetvp(ump->um_devvp, bp);
 		/*
 		 * We only do the blocking wait once we find the journal
 		 * entry we're looking for.
 		 */
 		if (needwk == NULL && flags == MNT_WAIT)
 			bwrite(bp);
 		else
 			bawrite(bp);
 		ACQUIRE_LOCK(ump);
 	}
 	/*
 	 * If we wrote a segment issue a synchronize cache so the journal
 	 * is reflected on disk before the data is written.  Since reclaiming
 	 * journal space also requires writing a journal record this
 	 * process also enforces a barrier before reclamation.
 	 */
 	if (segwritten && shouldflush) {
 		softdep_synchronize(bio, ump, 
 		    TAILQ_LAST(&jblocks->jb_segs, jseglst));
 	} else if (bio)
 		g_destroy_bio(bio);
 	/*
 	 * If we've suspended the filesystem because we ran out of journal
 	 * space either try to sync it here to make some progress or
 	 * unsuspend it if we already have.
 	 */
 	if (flags == 0 && jblocks->jb_suspended) {
 		if (journal_unsuspend(ump))
 			return;
 		FREE_LOCK(ump);
 		VFS_SYNC(mp, MNT_NOWAIT);
 		ffs_sbupdate(ump, MNT_WAIT, 0);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 /*
  * Complete a jseg, allowing all dependencies awaiting journal writes
  * to proceed.  Each journal dependency also attaches a jsegdep to dependent
  * structures so that the journal segment can be freed to reclaim space.
  */
 static void
 complete_jseg(jseg)
 	struct jseg *jseg;
 {
 	struct worklist *wk;
 	struct jmvref *jmvref;
 	int waiting;
 #ifdef INVARIANTS
 	int i = 0;
 #endif
 
 	while ((wk = LIST_FIRST(&jseg->js_entries)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		waiting = wk->wk_state & IOWAITING;
 		wk->wk_state &= ~(INPROGRESS | IOWAITING);
 		wk->wk_state |= COMPLETE;
 		KASSERT(i++ < jseg->js_cnt,
 		    ("handle_written_jseg: overflow %d >= %d",
 		    i - 1, jseg->js_cnt));
 		switch (wk->wk_type) {
 		case D_JADDREF:
 			handle_written_jaddref(WK_JADDREF(wk));
 			break;
 		case D_JREMREF:
 			handle_written_jremref(WK_JREMREF(wk));
 			break;
 		case D_JMVREF:
 			rele_jseg(jseg);	/* No jsegdep. */
 			jmvref = WK_JMVREF(wk);
 			LIST_REMOVE(jmvref, jm_deps);
 			if ((jmvref->jm_pagedep->pd_state & ONWORKLIST) == 0)
 				free_pagedep(jmvref->jm_pagedep);
 			WORKITEM_FREE(jmvref, D_JMVREF);
 			break;
 		case D_JNEWBLK:
 			handle_written_jnewblk(WK_JNEWBLK(wk));
 			break;
 		case D_JFREEBLK:
 			handle_written_jblkdep(&WK_JFREEBLK(wk)->jf_dep);
 			break;
 		case D_JTRUNC:
 			handle_written_jblkdep(&WK_JTRUNC(wk)->jt_dep);
 			break;
 		case D_JFSYNC:
 			rele_jseg(jseg);	/* No jsegdep. */
 			WORKITEM_FREE(wk, D_JFSYNC);
 			break;
 		case D_JFREEFRAG:
 			handle_written_jfreefrag(WK_JFREEFRAG(wk));
 			break;
 		default:
 			panic("handle_written_jseg: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 		if (waiting)
 			wakeup(wk);
 	}
 	/* Release the self reference so the structure may be freed. */
 	rele_jseg(jseg);
 }
 
 /*
  * Determine which jsegs are ready for completion processing.  Waits for
  * synchronize cache to complete as well as forcing in-order completion
  * of journal entries.
  */
 static void
 complete_jsegs(jseg)
 	struct jseg *jseg;
 {
 	struct jblocks *jblocks;
 	struct jseg *jsegn;
 
 	jblocks = jseg->js_jblocks;
 	/*
 	 * Don't allow out of order completions.  If this isn't the first
 	 * block wait for it to write before we're done.
 	 */
 	if (jseg != jblocks->jb_writeseg)
 		return;
 	/* Iterate through available jsegs processing their entries. */
 	while (jseg && (jseg->js_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		jblocks->jb_oldestwrseq = jseg->js_oldseq;
 		jsegn = TAILQ_NEXT(jseg, js_next);
 		complete_jseg(jseg);
 		jseg = jsegn;
 	}
 	jblocks->jb_writeseg = jseg;
 	/*
 	 * Attempt to free jsegs now that oldestwrseq may have advanced. 
 	 */
 	free_jsegs(jblocks);
 }
 
 /*
  * Mark a jseg as DEPCOMPLETE and throw away the buffer.  Attempt to handle
  * the final completions.
  */
 static void
 handle_written_jseg(jseg, bp)
 	struct jseg *jseg;
 	struct buf *bp;
 {
 
 	if (jseg->js_refs == 0)
 		panic("handle_written_jseg: No self-reference on %p", jseg);
 	jseg->js_state |= DEPCOMPLETE;
 	/*
 	 * We'll never need this buffer again, set flags so it will be
 	 * discarded.
 	 */
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 	pbrelvp(bp);
 	complete_jsegs(jseg);
 }
 
 static inline struct jsegdep *
 inoref_jseg(inoref)
 	struct inoref *inoref;
 {
 	struct jsegdep *jsegdep;
 
 	jsegdep = inoref->if_jsegdep;
 	inoref->if_jsegdep = NULL;
 
 	return (jsegdep);
 }
 
 /*
  * Called once a jremref has made it to stable store.  The jremref is marked
  * complete and we attempt to free it.  Any pagedeps writes sleeping waiting
  * for the jremref to complete will be awoken by free_jremref.
  */
 static void
 handle_written_jremref(jremref)
 	struct jremref *jremref;
 {
 	struct inodedep *inodedep;
 	struct jsegdep *jsegdep;
 	struct dirrem *dirrem;
 
 	/* Grab the jsegdep. */
 	jsegdep = inoref_jseg(&jremref->jr_ref);
 	/*
 	 * Remove us from the inoref list.
 	 */
 	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino,
 	    0, &inodedep) == 0)
 		panic("handle_written_jremref: Lost inodedep");
 	TAILQ_REMOVE(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
 	/*
 	 * Complete the dirrem.
 	 */
 	dirrem = jremref->jr_dirrem;
 	jremref->jr_dirrem = NULL;
 	LIST_REMOVE(jremref, jr_deps);
 	jsegdep->jd_state |= jremref->jr_state & MKDIR_PARENT;
 	jwork_insert(&dirrem->dm_jwork, jsegdep);
 	if (LIST_EMPTY(&dirrem->dm_jremrefhd) &&
 	    (dirrem->dm_state & COMPLETE) != 0)
 		add_to_worklist(&dirrem->dm_list, 0);
 	free_jremref(jremref);
 }
 
 /*
  * Called once a jaddref has made it to stable store.  The dependency is
  * marked complete and any dependent structures are added to the inode
  * bufwait list to be completed as soon as it is written.  If a bitmap write
  * depends on this entry we move the inode into the inodedephd of the
  * bmsafemap dependency and attempt to remove the jaddref from the bmsafemap.
  */
 static void
 handle_written_jaddref(jaddref)
 	struct jaddref *jaddref;
 {
 	struct jsegdep *jsegdep;
 	struct inodedep *inodedep;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
 	/* Grab the jsegdep. */
 	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	mkdir = NULL;
 	diradd = NULL;
 	if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
 	    0, &inodedep) == 0)
 		panic("handle_written_jaddref: Lost inodedep.");
 	if (jaddref->ja_diradd == NULL)
 		panic("handle_written_jaddref: No dependency");
 	if (jaddref->ja_diradd->da_list.wk_type == D_DIRADD) {
 		diradd = jaddref->ja_diradd;
 		WORKLIST_INSERT(&inodedep->id_bufwait, &diradd->da_list);
 	} else if (jaddref->ja_state & MKDIR_PARENT) {
 		mkdir = jaddref->ja_mkdir;
 		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir->md_list);
 	} else if (jaddref->ja_state & MKDIR_BODY)
 		mkdir = jaddref->ja_mkdir;
 	else
 		panic("handle_written_jaddref: Unknown dependency %p",
 		    jaddref->ja_diradd);
 	jaddref->ja_diradd = NULL;	/* also clears ja_mkdir */
 	/*
 	 * Remove us from the inode list.
 	 */
 	TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref, if_deps);
 	/*
 	 * The mkdir may be waiting on the jaddref to clear before freeing.
 	 */
 	if (mkdir) {
 		KASSERT(mkdir->md_list.wk_type == D_MKDIR,
 		    ("handle_written_jaddref: Incorrect type for mkdir %s",
 		    TYPENAME(mkdir->md_list.wk_type)));
 		mkdir->md_jaddref = NULL;
 		diradd = mkdir->md_diradd;
 		mkdir->md_state |= DEPCOMPLETE;
 		complete_mkdir(mkdir);
 	}
 	jwork_insert(&diradd->da_jwork, jsegdep);
 	if (jaddref->ja_state & NEWBLOCK) {
 		inodedep->id_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_inodedephd,
 		    inodedep, id_deps);
 	}
 	free_jaddref(jaddref);
 }
 
 /*
  * Called once a jnewblk journal is written.  The allocdirect or allocindir
  * is placed in the bmsafemap to await notification of a written bitmap.  If
  * the operation was canceled we add the segdep to the appropriate
  * dependency to free the journal space once the canceling operation
  * completes.
  */
 static void
 handle_written_jnewblk(jnewblk)
 	struct jnewblk *jnewblk;
 {
 	struct bmsafemap *bmsafemap;
 	struct freefrag *freefrag;
 	struct freework *freework;
 	struct jsegdep *jsegdep;
 	struct newblk *newblk;
 
 	/* Grab the jsegdep. */
 	jsegdep = jnewblk->jn_jsegdep;
 	jnewblk->jn_jsegdep = NULL;
 	if (jnewblk->jn_dep == NULL) 
 		panic("handle_written_jnewblk: No dependency for the segdep.");
 	switch (jnewblk->jn_dep->wk_type) {
 	case D_NEWBLK:
 	case D_ALLOCDIRECT:
 	case D_ALLOCINDIR:
 		/*
 		 * Add the written block to the bmsafemap so it can
 		 * be notified when the bitmap is on disk.
 		 */
 		newblk = WK_NEWBLK(jnewblk->jn_dep);
 		newblk->nb_jnewblk = NULL;
 		if ((newblk->nb_state & GOINGAWAY) == 0) {
 			bmsafemap = newblk->nb_bmsafemap;
 			newblk->nb_state |= ONDEPLIST;
 			LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk,
 			    nb_deps);
 		}
 		jwork_insert(&newblk->nb_jwork, jsegdep);
 		break;
 	case D_FREEFRAG:
 		/*
 		 * A newblock being removed by a freefrag when replaced by
 		 * frag extension.
 		 */
 		freefrag = WK_FREEFRAG(jnewblk->jn_dep);
 		freefrag->ff_jdep = NULL;
 		jwork_insert(&freefrag->ff_jwork, jsegdep);
 		break;
 	case D_FREEWORK:
 		/*
 		 * A direct block was removed by truncate.
 		 */
 		freework = WK_FREEWORK(jnewblk->jn_dep);
 		freework->fw_jnewblk = NULL;
 		jwork_insert(&freework->fw_freeblks->fb_jwork, jsegdep);
 		break;
 	default:
 		panic("handle_written_jnewblk: Unknown type %d.",
 		    jnewblk->jn_dep->wk_type);
 	}
 	jnewblk->jn_dep = NULL;
 	free_jnewblk(jnewblk);
 }
 
 /*
  * Cancel a jfreefrag that won't be needed, probably due to colliding with
  * an in-flight allocation that has not yet been committed.  Divorce us
  * from the freefrag and mark it DEPCOMPLETE so that it may be added
  * to the worklist.
  */
 static void
 cancel_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 	struct freefrag *freefrag;
 
 	if (jfreefrag->fr_jsegdep) {
 		free_jsegdep(jfreefrag->fr_jsegdep);
 		jfreefrag->fr_jsegdep = NULL;
 	}
 	freefrag = jfreefrag->fr_freefrag;
 	jfreefrag->fr_freefrag = NULL;
 	free_jfreefrag(jfreefrag);
 	freefrag->ff_state |= DEPCOMPLETE;
 	CTR1(KTR_SUJ, "cancel_jfreefrag: blkno %jd", freefrag->ff_blkno);
 }
 
 /*
  * Free a jfreefrag when the parent freefrag is rendered obsolete.
  */
 static void
 free_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 
 	if (jfreefrag->fr_state & INPROGRESS)
 		WORKLIST_REMOVE(&jfreefrag->fr_list);
 	else if (jfreefrag->fr_state & ONWORKLIST)
 		remove_from_journal(&jfreefrag->fr_list);
 	if (jfreefrag->fr_freefrag != NULL)
 		panic("free_jfreefrag:  Still attached to a freefrag.");
 	WORKITEM_FREE(jfreefrag, D_JFREEFRAG);
 }
 
 /*
  * Called when the journal write for a jfreefrag completes.  The parent
  * freefrag is added to the worklist if this completes its dependencies.
  */
 static void
 handle_written_jfreefrag(jfreefrag)
 	struct jfreefrag *jfreefrag;
 {
 	struct jsegdep *jsegdep;
 	struct freefrag *freefrag;
 
 	/* Grab the jsegdep. */
 	jsegdep = jfreefrag->fr_jsegdep;
 	jfreefrag->fr_jsegdep = NULL;
 	freefrag = jfreefrag->fr_freefrag;
 	if (freefrag == NULL)
 		panic("handle_written_jfreefrag: No freefrag.");
 	freefrag->ff_state |= DEPCOMPLETE;
 	freefrag->ff_jdep = NULL;
 	jwork_insert(&freefrag->ff_jwork, jsegdep);
 	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freefrag->ff_list, 0);
 	jfreefrag->fr_freefrag = NULL;
 	free_jfreefrag(jfreefrag);
 }
 
 /*
  * Called when the journal write for a jfreeblk completes.  The jfreeblk
  * is removed from the freeblks list of pending journal writes and the
  * jsegdep is moved to the freeblks jwork to be completed when all blocks
  * have been reclaimed.
  */
 static void
 handle_written_jblkdep(jblkdep)
 	struct jblkdep *jblkdep;
 {
 	struct freeblks *freeblks;
 	struct jsegdep *jsegdep;
 
 	/* Grab the jsegdep. */
 	jsegdep = jblkdep->jb_jsegdep;
 	jblkdep->jb_jsegdep = NULL;
 	freeblks = jblkdep->jb_freeblks;
 	LIST_REMOVE(jblkdep, jb_deps);
 	jwork_insert(&freeblks->fb_jwork, jsegdep);
 	/*
 	 * If the freeblks is all journaled, we can add it to the worklist.
 	 */
 	if (LIST_EMPTY(&freeblks->fb_jblkdephd) &&
 	    (freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 
 	free_jblkdep(jblkdep);
 }
 
 static struct jsegdep *
 newjsegdep(struct worklist *wk)
 {
 	struct jsegdep *jsegdep;
 
 	jsegdep = malloc(sizeof(*jsegdep), M_JSEGDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jsegdep->jd_list, D_JSEGDEP, wk->wk_mp);
 	jsegdep->jd_seg = NULL;
 
 	return (jsegdep);
 }
 
 static struct jmvref *
 newjmvref(dp, ino, oldoff, newoff)
 	struct inode *dp;
 	ino_t ino;
 	off_t oldoff;
 	off_t newoff;
 {
 	struct jmvref *jmvref;
 
 	jmvref = malloc(sizeof(*jmvref), M_JMVREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jmvref->jm_list, D_JMVREF, ITOVFS(dp));
 	jmvref->jm_list.wk_state = ATTACHED | DEPCOMPLETE;
 	jmvref->jm_parent = dp->i_number;
 	jmvref->jm_ino = ino;
 	jmvref->jm_oldoff = oldoff;
 	jmvref->jm_newoff = newoff;
 
 	return (jmvref);
 }
 
 /*
  * Allocate a new jremref that tracks the removal of ip from dp with the
  * directory entry offset of diroff.  Mark the entry as ATTACHED and
  * DEPCOMPLETE as we have all the information required for the journal write
  * and the directory has already been removed from the buffer.  The caller
  * is responsible for linking the jremref into the pagedep and adding it
  * to the journal to write.  The MKDIR_PARENT flag is set if we're doing
  * a DOTDOT addition so handle_workitem_remove() can properly assign
  * the jsegdep when we're done.
  */
 static struct jremref *
 newjremref(struct dirrem *dirrem, struct inode *dp, struct inode *ip,
     off_t diroff, nlink_t nlink)
 {
 	struct jremref *jremref;
 
 	jremref = malloc(sizeof(*jremref), M_JREMREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jremref->jr_list, D_JREMREF, ITOVFS(dp));
 	jremref->jr_state = ATTACHED;
 	newinoref(&jremref->jr_ref, ip->i_number, dp->i_number, diroff,
 	   nlink, ip->i_mode);
 	jremref->jr_dirrem = dirrem;
 
 	return (jremref);
 }
 
 static inline void
 newinoref(struct inoref *inoref, ino_t ino, ino_t parent, off_t diroff,
     nlink_t nlink, uint16_t mode)
 {
 
 	inoref->if_jsegdep = newjsegdep(&inoref->if_list);
 	inoref->if_diroff = diroff;
 	inoref->if_ino = ino;
 	inoref->if_parent = parent;
 	inoref->if_nlink = nlink;
 	inoref->if_mode = mode;
 }
 
 /*
  * Allocate a new jaddref to track the addition of ino to dp at diroff.  The
  * directory offset may not be known until later.  The caller is responsible
  * adding the entry to the journal when this information is available.  nlink
  * should be the link count prior to the addition and mode is only required
  * to have the correct FMT.
  */
 static struct jaddref *
 newjaddref(struct inode *dp, ino_t ino, off_t diroff, int16_t nlink,
     uint16_t mode)
 {
 	struct jaddref *jaddref;
 
 	jaddref = malloc(sizeof(*jaddref), M_JADDREF, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jaddref->ja_list, D_JADDREF, ITOVFS(dp));
 	jaddref->ja_state = ATTACHED;
 	jaddref->ja_mkdir = NULL;
 	newinoref(&jaddref->ja_ref, ino, dp->i_number, diroff, nlink, mode);
 
 	return (jaddref);
 }
 
 /*
  * Create a new free dependency for a freework.  The caller is responsible
  * for adjusting the reference count when it has the lock held.  The freedep
  * will track an outstanding bitmap write that will ultimately clear the
  * freework to continue.
  */
 static struct freedep *
 newfreedep(struct freework *freework)
 {
 	struct freedep *freedep;
 
 	freedep = malloc(sizeof(*freedep), M_FREEDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freedep->fd_list, D_FREEDEP, freework->fw_list.wk_mp);
 	freedep->fd_freework = freework;
 
 	return (freedep);
 }
 
 /*
  * Free a freedep structure once the buffer it is linked to is written.  If
  * this is the last reference to the freework schedule it for completion.
  */
 static void
 free_freedep(freedep)
 	struct freedep *freedep;
 {
 	struct freework *freework;
 
 	freework = freedep->fd_freework;
 	freework->fw_freeblks->fb_cgwait--;
 	if (--freework->fw_ref == 0)
 		freework_enqueue(freework);
 	WORKITEM_FREE(freedep, D_FREEDEP);
 }
 
 /*
  * Allocate a new freework structure that may be a level in an indirect
  * when parent is not NULL or a top level block when it is.  The top level
  * freework structures are allocated without the per-filesystem lock held
  * and before the freeblks is visible outside of softdep_setup_freeblocks().
  */
 static struct freework *
 newfreework(ump, freeblks, parent, lbn, nb, frags, off, journal)
 	struct ufsmount *ump;
 	struct freeblks *freeblks;
 	struct freework *parent;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t nb;
 	int frags;
 	int off;
 	int journal;
 {
 	struct freework *freework;
 
 	freework = malloc(sizeof(*freework), M_FREEWORK, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freework->fw_list, D_FREEWORK, freeblks->fb_list.wk_mp);
 	freework->fw_state = ATTACHED;
 	freework->fw_jnewblk = NULL;
 	freework->fw_freeblks = freeblks;
 	freework->fw_parent = parent;
 	freework->fw_lbn = lbn;
 	freework->fw_blkno = nb;
 	freework->fw_frags = frags;
 	freework->fw_indir = NULL;
 	freework->fw_ref = (MOUNTEDSUJ(UFSTOVFS(ump)) == 0 || lbn >= -NXADDR)
 		? 0 : NINDIR(ump->um_fs) + 1;
 	freework->fw_start = freework->fw_off = off;
 	if (journal)
 		newjfreeblk(freeblks, lbn, nb, frags);
 	if (parent == NULL) {
 		ACQUIRE_LOCK(ump);
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 		freeblks->fb_ref++;
 		FREE_LOCK(ump);
 	}
 
 	return (freework);
 }
 
 /*
  * Eliminate a jfreeblk for a block that does not need journaling.
  */
 static void
 cancel_jfreeblk(freeblks, blkno)
 	struct freeblks *freeblks;
 	ufs2_daddr_t blkno;
 {
 	struct jfreeblk *jfreeblk;
 	struct jblkdep *jblkdep;
 
 	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps) {
 		if (jblkdep->jb_list.wk_type != D_JFREEBLK)
 			continue;
 		jfreeblk = WK_JFREEBLK(&jblkdep->jb_list);
 		if (jfreeblk->jf_blkno == blkno)
 			break;
 	}
 	if (jblkdep == NULL)
 		return;
 	CTR1(KTR_SUJ, "cancel_jfreeblk: blkno %jd", blkno);
 	free_jsegdep(jblkdep->jb_jsegdep);
 	LIST_REMOVE(jblkdep, jb_deps);
 	WORKITEM_FREE(jfreeblk, D_JFREEBLK);
 }
 
 /*
  * Allocate a new jfreeblk to journal top level block pointer when truncating
  * a file.  The caller must add this to the worklist when the per-filesystem
  * lock is held.
  */
 static struct jfreeblk *
 newjfreeblk(freeblks, lbn, blkno, frags)
 	struct freeblks *freeblks;
 	ufs_lbn_t lbn;
 	ufs2_daddr_t blkno;
 	int frags;
 {
 	struct jfreeblk *jfreeblk;
 
 	jfreeblk = malloc(sizeof(*jfreeblk), M_JFREEBLK, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jfreeblk->jf_dep.jb_list, D_JFREEBLK,
 	    freeblks->fb_list.wk_mp);
 	jfreeblk->jf_dep.jb_jsegdep = newjsegdep(&jfreeblk->jf_dep.jb_list);
 	jfreeblk->jf_dep.jb_freeblks = freeblks;
 	jfreeblk->jf_ino = freeblks->fb_inum;
 	jfreeblk->jf_lbn = lbn;
 	jfreeblk->jf_blkno = blkno;
 	jfreeblk->jf_frags = frags;
 	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jfreeblk->jf_dep, jb_deps);
 
 	return (jfreeblk);
 }
 
 /*
  * The journal is only prepared to handle full-size block numbers, so we
  * have to adjust the record to reflect the change to a full-size block.
  * For example, suppose we have a block made up of fragments 8-15 and
  * want to free its last two fragments. We are given a request that says:
  *     FREEBLK ino=5, blkno=14, lbn=0, frags=2, oldfrags=0
  * where frags are the number of fragments to free and oldfrags are the
  * number of fragments to keep. To block align it, we have to change it to
  * have a valid full-size blkno, so it becomes:
  *     FREEBLK ino=5, blkno=8, lbn=0, frags=2, oldfrags=6
  */
 static void
 adjust_newfreework(freeblks, frag_offset)
 	struct freeblks *freeblks;
 	int frag_offset;
 {
 	struct jfreeblk *jfreeblk;
 
 	KASSERT((LIST_FIRST(&freeblks->fb_jblkdephd) != NULL &&
 	    LIST_FIRST(&freeblks->fb_jblkdephd)->jb_list.wk_type == D_JFREEBLK),
 	    ("adjust_newfreework: Missing freeblks dependency"));
 
 	jfreeblk = WK_JFREEBLK(LIST_FIRST(&freeblks->fb_jblkdephd));
 	jfreeblk->jf_blkno -= frag_offset;
 	jfreeblk->jf_frags += frag_offset;
 }
 
 /*
  * Allocate a new jtrunc to track a partial truncation.
  */
 static struct jtrunc *
 newjtrunc(freeblks, size, extsize)
 	struct freeblks *freeblks;
 	off_t size;
 	int extsize;
 {
 	struct jtrunc *jtrunc;
 
 	jtrunc = malloc(sizeof(*jtrunc), M_JTRUNC, M_SOFTDEP_FLAGS);
 	workitem_alloc(&jtrunc->jt_dep.jb_list, D_JTRUNC,
 	    freeblks->fb_list.wk_mp);
 	jtrunc->jt_dep.jb_jsegdep = newjsegdep(&jtrunc->jt_dep.jb_list);
 	jtrunc->jt_dep.jb_freeblks = freeblks;
 	jtrunc->jt_ino = freeblks->fb_inum;
 	jtrunc->jt_size = size;
 	jtrunc->jt_extsize = extsize;
 	LIST_INSERT_HEAD(&freeblks->fb_jblkdephd, &jtrunc->jt_dep, jb_deps);
 
 	return (jtrunc);
 }
 
 /*
  * If we're canceling a new bitmap we have to search for another ref
  * to move into the bmsafemap dep.  This might be better expressed
  * with another structure.
  */
 static void
 move_newblock_dep(jaddref, inodedep)
 	struct jaddref *jaddref;
 	struct inodedep *inodedep;
 {
 	struct inoref *inoref;
 	struct jaddref *jaddrefn;
 
 	jaddrefn = NULL;
 	for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
 	    inoref = TAILQ_NEXT(inoref, if_deps)) {
 		if ((jaddref->ja_state & NEWBLOCK) &&
 		    inoref->if_list.wk_type == D_JADDREF) {
 			jaddrefn = (struct jaddref *)inoref;
 			break;
 		}
 	}
 	if (jaddrefn == NULL)
 		return;
 	jaddrefn->ja_state &= ~(ATTACHED | UNDONE);
 	jaddrefn->ja_state |= jaddref->ja_state &
 	    (ATTACHED | UNDONE | NEWBLOCK);
 	jaddref->ja_state &= ~(ATTACHED | UNDONE | NEWBLOCK);
 	jaddref->ja_state |= ATTACHED;
 	LIST_REMOVE(jaddref, ja_bmdeps);
 	LIST_INSERT_HEAD(&inodedep->id_bmsafemap->sm_jaddrefhd, jaddrefn,
 	    ja_bmdeps);
 }
 
 /*
  * Cancel a jaddref either before it has been written or while it is being
  * written.  This happens when a link is removed before the add reaches
  * the disk.  The jaddref dependency is kept linked into the bmsafemap
  * and inode to prevent the link count or bitmap from reaching the disk
  * until handle_workitem_remove() re-adjusts the counts and bitmaps as
  * required.
  *
  * Returns 1 if the canceled addref requires journaling of the remove and
  * 0 otherwise.
  */
 static int
 cancel_jaddref(jaddref, inodedep, wkhd)
 	struct jaddref *jaddref;
 	struct inodedep *inodedep;
 	struct workhead *wkhd;
 {
 	struct inoref *inoref;
 	struct jsegdep *jsegdep;
 	int needsj;
 
 	KASSERT((jaddref->ja_state & COMPLETE) == 0,
 	    ("cancel_jaddref: Canceling complete jaddref"));
 	if (jaddref->ja_state & (INPROGRESS | COMPLETE))
 		needsj = 1;
 	else
 		needsj = 0;
 	if (inodedep == NULL)
 		if (inodedep_lookup(jaddref->ja_list.wk_mp, jaddref->ja_ino,
 		    0, &inodedep) == 0)
 			panic("cancel_jaddref: Lost inodedep");
 	/*
 	 * We must adjust the nlink of any reference operation that follows
 	 * us so that it is consistent with the in-memory reference.  This
 	 * ensures that inode nlink rollbacks always have the correct link.
 	 */
 	if (needsj == 0) {
 		for (inoref = TAILQ_NEXT(&jaddref->ja_ref, if_deps); inoref;
 		    inoref = TAILQ_NEXT(inoref, if_deps)) {
 			if (inoref->if_state & GOINGAWAY)
 				break;
 			inoref->if_nlink--;
 		}
 	}
 	jsegdep = inoref_jseg(&jaddref->ja_ref);
 	if (jaddref->ja_state & NEWBLOCK)
 		move_newblock_dep(jaddref, inodedep);
 	wake_worklist(&jaddref->ja_list);
 	jaddref->ja_mkdir = NULL;
 	if (jaddref->ja_state & INPROGRESS) {
 		jaddref->ja_state &= ~INPROGRESS;
 		WORKLIST_REMOVE(&jaddref->ja_list);
 		jwork_insert(wkhd, jsegdep);
 	} else {
 		free_jsegdep(jsegdep);
 		if (jaddref->ja_state & DEPCOMPLETE)
 			remove_from_journal(&jaddref->ja_list);
 	}
 	jaddref->ja_state |= (GOINGAWAY | DEPCOMPLETE);
 	/*
 	 * Leave NEWBLOCK jaddrefs on the inodedep so handle_workitem_remove
 	 * can arrange for them to be freed with the bitmap.  Otherwise we
 	 * no longer need this addref attached to the inoreflst and it
 	 * will incorrectly adjust nlink if we leave it.
 	 */
 	if ((jaddref->ja_state & NEWBLOCK) == 0) {
 		TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 		jaddref->ja_state |= COMPLETE;
 		free_jaddref(jaddref);
 		return (needsj);
 	}
 	/*
 	 * Leave the head of the list for jsegdeps for fast merging.
 	 */
 	if (LIST_FIRST(wkhd) != NULL) {
 		jaddref->ja_state |= ONWORKLIST;
 		LIST_INSERT_AFTER(LIST_FIRST(wkhd), &jaddref->ja_list, wk_list);
 	} else
 		WORKLIST_INSERT(wkhd, &jaddref->ja_list);
 
 	return (needsj);
 }
 
 /* 
  * Attempt to free a jaddref structure when some work completes.  This
  * should only succeed once the entry is written and all dependencies have
  * been notified.
  */
 static void
 free_jaddref(jaddref)
 	struct jaddref *jaddref;
 {
 
 	if ((jaddref->ja_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	if (jaddref->ja_ref.if_jsegdep)
 		panic("free_jaddref: segdep attached to jaddref %p(0x%X)\n",
 		    jaddref, jaddref->ja_state);
 	if (jaddref->ja_state & NEWBLOCK)
 		LIST_REMOVE(jaddref, ja_bmdeps);
 	if (jaddref->ja_state & (INPROGRESS | ONWORKLIST))
 		panic("free_jaddref: Bad state %p(0x%X)",
 		    jaddref, jaddref->ja_state);
 	if (jaddref->ja_mkdir != NULL)
 		panic("free_jaddref: Work pending, 0x%X\n", jaddref->ja_state);
 	WORKITEM_FREE(jaddref, D_JADDREF);
 }
 
 /*
  * Free a jremref structure once it has been written or discarded.
  */
 static void
 free_jremref(jremref)
 	struct jremref *jremref;
 {
 
 	if (jremref->jr_ref.if_jsegdep)
 		free_jsegdep(jremref->jr_ref.if_jsegdep);
 	if (jremref->jr_state & INPROGRESS)
 		panic("free_jremref: IO still pending");
 	WORKITEM_FREE(jremref, D_JREMREF);
 }
 
 /*
  * Free a jnewblk structure.
  */
 static void
 free_jnewblk(jnewblk)
 	struct jnewblk *jnewblk;
 {
 
 	if ((jnewblk->jn_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	LIST_REMOVE(jnewblk, jn_deps);
 	if (jnewblk->jn_dep != NULL)
 		panic("free_jnewblk: Dependency still attached.");
 	WORKITEM_FREE(jnewblk, D_JNEWBLK);
 }
 
 /*
  * Cancel a jnewblk which has been been made redundant by frag extension.
  */
 static void
 cancel_jnewblk(jnewblk, wkhd)
 	struct jnewblk *jnewblk;
 	struct workhead *wkhd;
 {
 	struct jsegdep *jsegdep;
 
 	CTR1(KTR_SUJ, "cancel_jnewblk: blkno %jd", jnewblk->jn_blkno);
 	jsegdep = jnewblk->jn_jsegdep;
 	if (jnewblk->jn_jsegdep == NULL || jnewblk->jn_dep == NULL)
 		panic("cancel_jnewblk: Invalid state");
 	jnewblk->jn_jsegdep  = NULL;
 	jnewblk->jn_dep = NULL;
 	jnewblk->jn_state |= GOINGAWAY;
 	if (jnewblk->jn_state & INPROGRESS) {
 		jnewblk->jn_state &= ~INPROGRESS;
 		WORKLIST_REMOVE(&jnewblk->jn_list);
 		jwork_insert(wkhd, jsegdep);
 	} else {
 		free_jsegdep(jsegdep);
 		remove_from_journal(&jnewblk->jn_list);
 	}
 	wake_worklist(&jnewblk->jn_list);
 	WORKLIST_INSERT(wkhd, &jnewblk->jn_list);
 }
 
 static void
 free_jblkdep(jblkdep)
 	struct jblkdep *jblkdep;
 {
 
 	if (jblkdep->jb_list.wk_type == D_JFREEBLK)
 		WORKITEM_FREE(jblkdep, D_JFREEBLK);
 	else if (jblkdep->jb_list.wk_type == D_JTRUNC)
 		WORKITEM_FREE(jblkdep, D_JTRUNC);
 	else
 		panic("free_jblkdep: Unexpected type %s",
 		    TYPENAME(jblkdep->jb_list.wk_type));
 }
 
 /*
  * Free a single jseg once it is no longer referenced in memory or on
  * disk.  Reclaim journal blocks and dependencies waiting for the segment
  * to disappear.
  */
 static void
 free_jseg(jseg, jblocks)
 	struct jseg *jseg;
 	struct jblocks *jblocks;
 {
 	struct freework *freework;
 
 	/*
 	 * Free freework structures that were lingering to indicate freed
 	 * indirect blocks that forced journal write ordering on reallocate.
 	 */
 	while ((freework = LIST_FIRST(&jseg->js_indirs)) != NULL)
 		indirblk_remove(freework);
 	if (jblocks->jb_oldestseg == jseg)
 		jblocks->jb_oldestseg = TAILQ_NEXT(jseg, js_next);
 	TAILQ_REMOVE(&jblocks->jb_segs, jseg, js_next);
 	jblocks_free(jblocks, jseg->js_list.wk_mp, jseg->js_size);
 	KASSERT(LIST_EMPTY(&jseg->js_entries),
 	    ("free_jseg: Freed jseg has valid entries."));
 	WORKITEM_FREE(jseg, D_JSEG);
 }
 
 /*
  * Free all jsegs that meet the criteria for being reclaimed and update
  * oldestseg.
  */
 static void
 free_jsegs(jblocks)
 	struct jblocks *jblocks;
 {
 	struct jseg *jseg;
 
 	/*
 	 * Free only those jsegs which have none allocated before them to
 	 * preserve the journal space ordering.
 	 */
 	while ((jseg = TAILQ_FIRST(&jblocks->jb_segs)) != NULL) {
 		/*
 		 * Only reclaim space when nothing depends on this journal
 		 * set and another set has written that it is no longer
 		 * valid.
 		 */
 		if (jseg->js_refs != 0) {
 			jblocks->jb_oldestseg = jseg;
 			return;
 		}
 		if ((jseg->js_state & ALLCOMPLETE) != ALLCOMPLETE)
 			break;
 		if (jseg->js_seq > jblocks->jb_oldestwrseq)
 			break;
 		/*
 		 * We can free jsegs that didn't write entries when
 		 * oldestwrseq == js_seq.
 		 */
 		if (jseg->js_seq == jblocks->jb_oldestwrseq &&
 		    jseg->js_cnt != 0)
 			break;
 		free_jseg(jseg, jblocks);
 	}
 	/*
 	 * If we exited the loop above we still must discover the
 	 * oldest valid segment.
 	 */
 	if (jseg)
 		for (jseg = jblocks->jb_oldestseg; jseg != NULL;
 		     jseg = TAILQ_NEXT(jseg, js_next))
 			if (jseg->js_refs != 0)
 				break;
 	jblocks->jb_oldestseg = jseg;
 	/*
 	 * The journal has no valid records but some jsegs may still be
 	 * waiting on oldestwrseq to advance.  We force a small record
 	 * out to permit these lingering records to be reclaimed.
 	 */
 	if (jblocks->jb_oldestseg == NULL && !TAILQ_EMPTY(&jblocks->jb_segs))
 		jblocks->jb_needseg = 1;
 }
 
 /*
  * Release one reference to a jseg and free it if the count reaches 0.  This
  * should eventually reclaim journal space as well.
  */
 static void
 rele_jseg(jseg)
 	struct jseg *jseg;
 {
 
 	KASSERT(jseg->js_refs > 0,
 	    ("free_jseg: Invalid refcnt %d", jseg->js_refs));
 	if (--jseg->js_refs != 0)
 		return;
 	free_jsegs(jseg->js_jblocks);
 }
 
 /*
  * Release a jsegdep and decrement the jseg count.
  */
 static void
 free_jsegdep(jsegdep)
 	struct jsegdep *jsegdep;
 {
 
 	if (jsegdep->jd_seg)
 		rele_jseg(jsegdep->jd_seg);
 	WORKITEM_FREE(jsegdep, D_JSEGDEP);
 }
 
 /*
  * Wait for a journal item to make it to disk.  Initiate journal processing
  * if required.
  */
 static int
 jwait(wk, waitfor)
 	struct worklist *wk;
 	int waitfor;
 {
 
 	LOCK_OWNED(VFSTOUFS(wk->wk_mp));
 	/*
 	 * Blocking journal waits cause slow synchronous behavior.  Record
 	 * stats on the frequency of these blocking operations.
 	 */
 	if (waitfor == MNT_WAIT) {
 		stat_journal_wait++;
 		switch (wk->wk_type) {
 		case D_JREMREF:
 		case D_JMVREF:
 			stat_jwait_filepage++;
 			break;
 		case D_JTRUNC:
 		case D_JFREEBLK:
 			stat_jwait_freeblks++;
 			break;
 		case D_JNEWBLK:
 			stat_jwait_newblk++;
 			break;
 		case D_JADDREF:
 			stat_jwait_inode++;
 			break;
 		default:
 			break;
 		}
 	}
 	/*
 	 * If IO has not started we process the journal.  We can't mark the
 	 * worklist item as IOWAITING because we drop the lock while
 	 * processing the journal and the worklist entry may be freed after
 	 * this point.  The caller may call back in and re-issue the request.
 	 */
 	if ((wk->wk_state & INPROGRESS) == 0) {
 		softdep_process_journal(wk->wk_mp, wk, waitfor);
 		if (waitfor != MNT_WAIT)
 			return (EBUSY);
 		return (0);
 	}
 	if (waitfor != MNT_WAIT)
 		return (EBUSY);
 	wait_worklist(wk, "jwait");
 	return (0);
 }
 
 /*
  * Lookup an inodedep based on an inode pointer and set the nlinkdelta as
  * appropriate.  This is a convenience function to reduce duplicate code
  * for the setup and revert functions below.
  */
 static struct inodedep *
 inodedep_lookup_ip(ip)
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 
 	KASSERT(ip->i_nlink >= ip->i_effnlink,
 	    ("inodedep_lookup_ip: bad delta"));
 	(void) inodedep_lookup(ITOVFS(ip), ip->i_number, DEPALLOC,
 	    &inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
 
 	return (inodedep);
 }
 
 /*
  * Called prior to creating a new inode and linking it to a directory.  The
  * jaddref structure must already be allocated by softdep_setup_inomapdep
  * and it is discovered here so we can initialize the mode and update
  * nlinkdelta.
  */
 void
 softdep_setup_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_create called on non-softdep filesystem"));
 	KASSERT(ip->i_nlink == 1,
 	    ("softdep_setup_create: Invalid link count."));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_create: No addref structure present."));
 	}
 	softdep_prelink(dvp, NULL);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Create a jaddref structure to track the addition of a DOTDOT link when
  * we are reparenting an inode as part of a rename.  This jaddref will be
  * found by softdep_setup_directory_change.  Adjusts nlinkdelta for
  * non-journaling softdep.
  */
 void
 softdep_setup_dotdot_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_dotdot_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	jaddref = NULL;
 	/*
 	 * We don't set MKDIR_PARENT as this is not tied to a mkdir and
 	 * is used as a normal link would be.
 	 */
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
 		    dp->i_effnlink - 1, dp->i_mode);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(dp);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Create a jaddref structure to track a new link to an inode.  The directory
  * offset is not known until softdep_setup_directory_add or
  * softdep_setup_directory_change.  Adjusts nlinkdelta for non-journaling
  * softdep.
  */
 void
 softdep_setup_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	jaddref = NULL;
 	if (DOINGSUJ(dvp))
 		jaddref = newjaddref(dp, ip->i_number, 0, ip->i_effnlink - 1,
 		    ip->i_mode);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (jaddref)
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to create the jaddref structures to track . and .. references as
  * well as lookup and further initialize the incomplete jaddref created
  * by softdep_setup_inomapdep when the inode was allocated.  Adjusts
  * nlinkdelta for non-journaling softdep.
  */
 void
 softdep_setup_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *dotdotaddref;
 	struct jaddref *dotaddref;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	dotaddref = dotdotaddref = NULL;
 	if (DOINGSUJ(dvp)) {
 		dotaddref = newjaddref(ip, ip->i_number, DOT_OFFSET, 1,
 		    ip->i_mode);
 		dotaddref->ja_state |= MKDIR_BODY;
 		dotdotaddref = newjaddref(ip, dp->i_number, DOTDOT_OFFSET,
 		    dp->i_effnlink - 1, dp->i_mode);
 		dotdotaddref->ja_state |= MKDIR_PARENT;
 	}
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL,
 		    ("softdep_setup_mkdir: No addref structure present."));
 		KASSERT(jaddref->ja_parent == dp->i_number, 
 		    ("softdep_setup_mkdir: bad parent %ju",
 		    (uintmax_t)jaddref->ja_parent));
 		TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref,
 		    if_deps);
 	}
 	inodedep = inodedep_lookup_ip(dp);
 	if (DOINGSUJ(dvp))
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst,
 		    &dotdotaddref->ja_ref, if_deps);
 	softdep_prelink(ITOV(dp), NULL);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to track nlinkdelta of the inode and parent directories prior to
  * unlinking a directory.
  */
 void
 softdep_setup_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_rmdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to track nlinkdelta of the inode and parent directories prior to
  * unlink.
  */
 void
 softdep_setup_unlink(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_setup_unlink called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	softdep_prelink(dvp, ITOV(ip));
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed non-directory
  * creation.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_create(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS((dp))) != 0,
 	    ("softdep_revert_create called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_create: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed link
  * addition.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_link(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_link called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_link: addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Called to release the journal structures created by a failed mkdir
  * attempt.  Adjusts nlinkdelta for non-journaling softdep.
  */
 void
 softdep_revert_mkdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct jaddref *dotaddref;
 	struct vnode *dvp;
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_mkdir called on non-softdep filesystem"));
 	dvp = ITOV(dp);
 
 	ACQUIRE_LOCK(ITOUMP(dp));
 	inodedep = inodedep_lookup_ip(dp);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == ip->i_number,
 		    ("softdep_revert_mkdir: dotdot addref parent mismatch"));
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 	}
 	inodedep = inodedep_lookup_ip(ip);
 	if (DOINGSUJ(dvp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref->ja_parent == dp->i_number,
 		    ("softdep_revert_mkdir: addref parent mismatch"));
 		dotaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
 		    inoreflst, if_deps);
 		cancel_jaddref(jaddref, inodedep, &inodedep->id_inowait);
 		KASSERT(dotaddref->ja_parent == ip->i_number,
 		    ("softdep_revert_mkdir: dot addref parent mismatch"));
 		cancel_jaddref(dotaddref, inodedep, &inodedep->id_inowait);
 	}
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /* 
  * Called to correct nlinkdelta after a failed rmdir.
  */
 void
 softdep_revert_rmdir(dp, ip)
 	struct inode *dp;
 	struct inode *ip;
 {
 
 	KASSERT(MOUNTEDSOFTDEP(ITOVFS(dp)) != 0,
 	    ("softdep_revert_rmdir called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ITOUMP(dp));
 	(void) inodedep_lookup_ip(ip);
 	(void) inodedep_lookup_ip(dp);
 	FREE_LOCK(ITOUMP(dp));
 }
 
 /*
  * Protecting the freemaps (or bitmaps).
  * 
  * To eliminate the need to execute fsck before mounting a filesystem
  * after a power failure, one must (conservatively) guarantee that the
  * on-disk copy of the bitmaps never indicate that a live inode or block is
  * free.  So, when a block or inode is allocated, the bitmap should be
  * updated (on disk) before any new pointers.  When a block or inode is
  * freed, the bitmap should not be updated until all pointers have been
  * reset.  The latter dependency is handled by the delayed de-allocation
  * approach described below for block and inode de-allocation.  The former
  * dependency is handled by calling the following procedure when a block or
  * inode is allocated. When an inode is allocated an "inodedep" is created
  * with its DEPCOMPLETE flag cleared until its bitmap is written to disk.
  * Each "inodedep" is also inserted into the hash indexing structure so
  * that any additional link additions can be made dependent on the inode
  * allocation.
  * 
  * The ufs filesystem maintains a number of free block counts (e.g., per
  * cylinder group, per cylinder and per <cylinder, rotational position> pair)
  * in addition to the bitmaps.  These counts are used to improve efficiency
  * during allocation and therefore must be consistent with the bitmaps.
  * There is no convenient way to guarantee post-crash consistency of these
  * counts with simple update ordering, for two main reasons: (1) The counts
  * and bitmaps for a single cylinder group block are not in the same disk
  * sector.  If a disk write is interrupted (e.g., by power failure), one may
  * be written and the other not.  (2) Some of the counts are located in the
  * superblock rather than the cylinder group block. So, we focus our soft
  * updates implementation on protecting the bitmaps. When mounting a
  * filesystem, we recompute the auxiliary counts from the bitmaps.
  */
 
 /*
  * Called just after updating the cylinder group block to allocate an inode.
  */
 void
 softdep_setup_inomapdep(bp, ip, newinum, mode)
 	struct buf *bp;		/* buffer for cylgroup block with inode map */
 	struct inode *ip;	/* inode related to allocation */
 	ino_t newinum;		/* new inode number being allocated */
 	int mode;
 {
 	struct inodedep *inodedep;
 	struct bmsafemap *bmsafemap;
 	struct jaddref *jaddref;
 	struct mount *mp;
 	struct fs *fs;
 
 	mp = ITOVFS(ip);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_inomapdep called on non-softdep filesystem"));
 	fs = VFSTOUFS(mp)->um_fs;
 	jaddref = NULL;
 
 	/*
 	 * Allocate the journal reference add structure so that the bitmap
 	 * can be dependent on it.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = newjaddref(ip, newinum, 0, 0, mode);
 		jaddref->ja_state |= NEWBLOCK;
 	}
 
 	/*
 	 * Create a dependency for the newly allocated inode.
 	 * Panic if it already exists as something is seriously wrong.
 	 * Otherwise add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 *
 	 * We have to preallocate a bmsafemap entry in case it is needed
 	 * in bmsafemap_lookup since once we allocate the inodedep, we
 	 * have to finish initializing it before we can FREE_LOCK().
 	 * By preallocating, we avoid FREE_LOCK() while doing a malloc
 	 * in bmsafemap_lookup. We cannot call bmsafemap_lookup before
 	 * creating the inodedep as it can be freed during the time
 	 * that we FREE_LOCK() while allocating the inodedep. We must
 	 * call workitem_alloc() before entering the locked section as
 	 * it also acquires the lock and we must avoid trying doing so
 	 * recursively.
 	 */
 	bmsafemap = malloc(sizeof(struct bmsafemap),
 	    M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if ((inodedep_lookup(mp, newinum, DEPALLOC, &inodedep)))
 		panic("softdep_setup_inomapdep: dependency %p for new"
 		    "inode already exists", inodedep);
 	bmsafemap = bmsafemap_lookup(mp, bp, ino_to_cg(fs, newinum), bmsafemap);
 	if (jaddref) {
 		LIST_INSERT_HEAD(&bmsafemap->sm_jaddrefhd, jaddref, ja_bmdeps);
 		TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jaddref->ja_ref,
 		    if_deps);
 	} else {
 		inodedep->id_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&bmsafemap->sm_inodedephd, inodedep, id_deps);
 	}
 	inodedep->id_bmsafemap = bmsafemap;
 	inodedep->id_state &= ~DEPCOMPLETE;
 	FREE_LOCK(ITOUMP(ip));
 }
 
 /*
  * Called just after updating the cylinder group block to
  * allocate block or fragment.
  */
 void
 softdep_setup_blkmapdep(bp, mp, newblkno, frags, oldfrags)
 	struct buf *bp;		/* buffer for cylgroup block with block map */
 	struct mount *mp;	/* filesystem doing allocation */
 	ufs2_daddr_t newblkno;	/* number of newly allocated block */
 	int frags;		/* Number of fragments. */
 	int oldfrags;		/* Previous number of fragments for extend. */
 {
 	struct newblk *newblk;
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_blkmapdep called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	jnewblk = NULL;
 	/*
 	 * Create a dependency for the newly allocated block.
 	 * Add it to the dependency list for the buffer holding
 	 * the cylinder group map from which it was allocated.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jnewblk = malloc(sizeof(*jnewblk), M_JNEWBLK, M_SOFTDEP_FLAGS);
 		workitem_alloc(&jnewblk->jn_list, D_JNEWBLK, mp);
 		jnewblk->jn_jsegdep = newjsegdep(&jnewblk->jn_list);
 		jnewblk->jn_state = ATTACHED;
 		jnewblk->jn_blkno = newblkno;
 		jnewblk->jn_frags = frags;
 		jnewblk->jn_oldfrags = oldfrags;
 #ifdef SUJ_DEBUG
 		{
 			struct cg *cgp;
 			uint8_t *blksfree;
 			long bno;
 			int i;
 	
 			cgp = (struct cg *)bp->b_data;
 			blksfree = cg_blksfree(cgp);
 			bno = dtogd(fs, jnewblk->jn_blkno);
 			for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags;
 			    i++) {
 				if (isset(blksfree, bno + i))
 					panic("softdep_setup_blkmapdep: "
 					    "free fragment %d from %d-%d "
 					    "state 0x%X dep %p", i,
 					    jnewblk->jn_oldfrags,
 					    jnewblk->jn_frags,
 					    jnewblk->jn_state,
 					    jnewblk->jn_dep);
 			}
 		}
 #endif
 	}
 
 	CTR3(KTR_SUJ,
 	    "softdep_setup_blkmapdep: blkno %jd frags %d oldfrags %d",
 	    newblkno, frags, oldfrags);
 	ACQUIRE_LOCK(ump);
 	if (newblk_lookup(mp, newblkno, DEPALLOC, &newblk) != 0)
 		panic("softdep_setup_blkmapdep: found block");
 	newblk->nb_bmsafemap = bmsafemap = bmsafemap_lookup(mp, bp,
 	    dtog(fs, newblkno), NULL);
 	if (jnewblk) {
 		jnewblk->jn_dep = (struct worklist *)newblk;
 		LIST_INSERT_HEAD(&bmsafemap->sm_jnewblkhd, jnewblk, jn_deps);
 	} else {
 		newblk->nb_state |= ONDEPLIST;
 		LIST_INSERT_HEAD(&bmsafemap->sm_newblkhd, newblk, nb_deps);
 	}
 	newblk->nb_bmsafemap = bmsafemap;
 	newblk->nb_jnewblk = jnewblk;
 	FREE_LOCK(ump);
 }
 
 #define	BMSAFEMAP_HASH(ump, cg) \
       (&(ump)->bmsafemap_hashtbl[(cg) & (ump)->bmsafemap_hash_size])
 
 static int
 bmsafemap_find(bmsafemaphd, cg, bmsafemapp)
 	struct bmsafemap_hashhead *bmsafemaphd;
 	int cg;
 	struct bmsafemap **bmsafemapp;
 {
 	struct bmsafemap *bmsafemap;
 
 	LIST_FOREACH(bmsafemap, bmsafemaphd, sm_hash)
 		if (bmsafemap->sm_cg == cg)
 			break;
 	if (bmsafemap) {
 		*bmsafemapp = bmsafemap;
 		return (1);
 	}
 	*bmsafemapp = NULL;
 
 	return (0);
 }
 
 /*
  * Find the bmsafemap associated with a cylinder group buffer.
  * If none exists, create one. The buffer must be locked when
  * this routine is called and this routine must be called with
  * the softdep lock held. To avoid giving up the lock while
  * allocating a new bmsafemap, a preallocated bmsafemap may be
  * provided. If it is provided but not needed, it is freed.
  */
 static struct bmsafemap *
 bmsafemap_lookup(mp, bp, cg, newbmsafemap)
 	struct mount *mp;
 	struct buf *bp;
 	int cg;
 	struct bmsafemap *newbmsafemap;
 {
 	struct bmsafemap_hashhead *bmsafemaphd;
 	struct bmsafemap *bmsafemap, *collision;
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	KASSERT(bp != NULL, ("bmsafemap_lookup: missing buffer"));
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		if (wk->wk_type == D_BMSAFEMAP) {
 			if (newbmsafemap)
 				WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
 			return (WK_BMSAFEMAP(wk));
 		}
 	}
 	bmsafemaphd = BMSAFEMAP_HASH(ump, cg);
 	if (bmsafemap_find(bmsafemaphd, cg, &bmsafemap) == 1) {
 		if (newbmsafemap)
 			WORKITEM_FREE(newbmsafemap, D_BMSAFEMAP);
 		return (bmsafemap);
 	}
 	if (newbmsafemap) {
 		bmsafemap = newbmsafemap;
 	} else {
 		FREE_LOCK(ump);
 		bmsafemap = malloc(sizeof(struct bmsafemap),
 			M_BMSAFEMAP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&bmsafemap->sm_list, D_BMSAFEMAP, mp);
 		ACQUIRE_LOCK(ump);
 	}
 	bmsafemap->sm_buf = bp;
 	LIST_INIT(&bmsafemap->sm_inodedephd);
 	LIST_INIT(&bmsafemap->sm_inodedepwr);
 	LIST_INIT(&bmsafemap->sm_newblkhd);
 	LIST_INIT(&bmsafemap->sm_newblkwr);
 	LIST_INIT(&bmsafemap->sm_jaddrefhd);
 	LIST_INIT(&bmsafemap->sm_jnewblkhd);
 	LIST_INIT(&bmsafemap->sm_freehd);
 	LIST_INIT(&bmsafemap->sm_freewr);
 	if (bmsafemap_find(bmsafemaphd, cg, &collision) == 1) {
 		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 		return (collision);
 	}
 	bmsafemap->sm_cg = cg;
 	LIST_INSERT_HEAD(bmsafemaphd, bmsafemap, sm_hash);
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
 	WORKLIST_INSERT(&bp->b_dep, &bmsafemap->sm_list);
 	return (bmsafemap);
 }
 
 /*
  * Direct block allocation dependencies.
  * 
  * When a new block is allocated, the corresponding disk locations must be
  * initialized (with zeros or new data) before the on-disk inode points to
  * them.  Also, the freemap from which the block was allocated must be
  * updated (on disk) before the inode's pointer. These two dependencies are
  * independent of each other and are needed for all file blocks and indirect
  * blocks that are pointed to directly by the inode.  Just before the
  * "in-core" version of the inode is updated with a newly allocated block
  * number, a procedure (below) is called to setup allocation dependency
  * structures.  These structures are removed when the corresponding
  * dependencies are satisfied or when the block allocation becomes obsolete
  * (i.e., the file is deleted, the block is de-allocated, or the block is a
  * fragment that gets upgraded).  All of these cases are handled in
  * procedures described later.
  * 
  * When a file extension causes a fragment to be upgraded, either to a larger
  * fragment or to a full block, the on-disk location may change (if the
  * previous fragment could not simply be extended). In this case, the old
  * fragment must be de-allocated, but not until after the inode's pointer has
  * been updated. In most cases, this is handled by later procedures, which
  * will construct a "freefrag" structure to be added to the workitem queue
  * when the inode update is complete (or obsolete).  The main exception to
  * this is when an allocation occurs while a pending allocation dependency
  * (for the same block pointer) remains.  This case is handled in the main
  * allocation dependency setup procedure by immediately freeing the
  * unreferenced fragments.
  */ 
 void 
 softdep_setup_allocdirect(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;	/* inode to which block is being added */
 	ufs_lbn_t off;		/* block pointer within inode */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 unless frag */
 	long newsize;		/* size of new block */
 	long oldsize;		/* size of new block */
 	struct buf *bp;		/* bp for allocated block */
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct freefrag *freefrag;
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
 	ufs_lbn_t lbn;
 
 	lbn = bp->b_lblkno;
 	mp = ITOVFS(ip);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocdirect called on non-softdep filesystem"));
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
 		freefrag = NULL;
 
 	CTR6(KTR_SUJ,
 	    "softdep_setup_allocdirect: ino %d blkno %jd oldblkno %jd "
 	    "off %jd newsize %ld oldsize %d",
 	    ip->i_number, newblkno, oldblkno, off, newsize, oldsize);
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if (off >= NDADDR) {
 		if (lbn > 0)
 			panic("softdep_setup_allocdirect: bad lbn %jd, off %jd",
 			    lbn, off);
 		/* allocating an indirect block */
 		if (oldblkno != 0)
 			panic("softdep_setup_allocdirect: non-zero indir");
 	} else {
 		if (off != lbn)
 			panic("softdep_setup_allocdirect: lbn %jd != off %jd",
 			    lbn, off);
 		/*
 		 * Allocating a direct block.
 		 *
 		 * If we are allocating a directory block, then we must
 		 * allocate an associated pagedep to track additions and
 		 * deletions.
 		 */
 		if ((ip->i_mode & IFMT) == IFDIR)
 			pagedep_lookup(mp, bp, ip->i_number, off, DEPALLOC,
 			    &pagedep);
 	}
 	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocdirect: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("softdep_setup_allocdirect: newblk already initialized"));
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
 	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 
 	/*
 	 * Finish initializing the journal.
 	 */
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newinoupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(ITOUMP(ip));
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
 		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocdirect: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 
 	FREE_LOCK(ITOUMP(ip));
 }
 
 /*
  * Merge a newer and older journal record to be stored either in a
  * newblock or freefrag.  This handles aggregating journal records for
  * fragment allocation into a second record as well as replacing a
  * journal free with an aborted journal allocation.  A segment for the
  * oldest record will be placed on wkhd if it has been written.  If not
  * the segment for the newer record will suffice.
  */
 static struct worklist *
 jnewblk_merge(new, old, wkhd)
 	struct worklist *new;
 	struct worklist *old;
 	struct workhead *wkhd;
 {
 	struct jnewblk *njnewblk;
 	struct jnewblk *jnewblk;
 
 	/* Handle NULLs to simplify callers. */
 	if (new == NULL)
 		return (old);
 	if (old == NULL)
 		return (new);
 	/* Replace a jfreefrag with a jnewblk. */
 	if (new->wk_type == D_JFREEFRAG) {
 		if (WK_JNEWBLK(old)->jn_blkno != WK_JFREEFRAG(new)->fr_blkno)
 			panic("jnewblk_merge: blkno mismatch: %p, %p",
 			    old, new);
 		cancel_jfreefrag(WK_JFREEFRAG(new));
 		return (old);
 	}
 	if (old->wk_type != D_JNEWBLK || new->wk_type != D_JNEWBLK)
 		panic("jnewblk_merge: Bad type: old %d new %d\n",
 		    old->wk_type, new->wk_type);
 	/*
 	 * Handle merging of two jnewblk records that describe
 	 * different sets of fragments in the same block.
 	 */
 	jnewblk = WK_JNEWBLK(old);
 	njnewblk = WK_JNEWBLK(new);
 	if (jnewblk->jn_blkno != njnewblk->jn_blkno)
 		panic("jnewblk_merge: Merging disparate blocks.");
 	/*
 	 * The record may be rolled back in the cg.
 	 */
 	if (jnewblk->jn_state & UNDONE) {
 		jnewblk->jn_state &= ~UNDONE;
 		njnewblk->jn_state |= UNDONE;
 		njnewblk->jn_state &= ~ATTACHED;
 	}
 	/*
 	 * We modify the newer addref and free the older so that if neither
 	 * has been written the most up-to-date copy will be on disk.  If
 	 * both have been written but rolled back we only temporarily need
 	 * one of them to fix the bits when the cg write completes.
 	 */
 	jnewblk->jn_state |= ATTACHED | COMPLETE;
 	njnewblk->jn_oldfrags = jnewblk->jn_oldfrags;
 	cancel_jnewblk(jnewblk, wkhd);
 	WORKLIST_REMOVE(&jnewblk->jn_list);
 	free_jnewblk(jnewblk);
 	return (new);
 }
 
 /*
  * Replace an old allocdirect dependency with a newer one.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 allocdirect_merge(adphead, newadp, oldadp)
 	struct allocdirectlst *adphead;	/* head of list holding allocdirects */
 	struct allocdirect *newadp;	/* allocdirect being added */
 	struct allocdirect *oldadp;	/* existing allocdirect being checked */
 {
 	struct worklist *wk;
 	struct freefrag *freefrag;
 
 	freefrag = NULL;
 	LOCK_OWNED(VFSTOUFS(newadp->ad_list.wk_mp));
 	if (newadp->ad_oldblkno != oldadp->ad_newblkno ||
 	    newadp->ad_oldsize != oldadp->ad_newsize ||
 	    newadp->ad_offset >= NDADDR)
 		panic("%s %jd != new %jd || old size %ld != new %ld",
 		    "allocdirect_merge: old blkno",
 		    (intmax_t)newadp->ad_oldblkno,
 		    (intmax_t)oldadp->ad_newblkno,
 		    newadp->ad_oldsize, oldadp->ad_newsize);
 	newadp->ad_oldblkno = oldadp->ad_oldblkno;
 	newadp->ad_oldsize = oldadp->ad_oldsize;
 	/*
 	 * If the old dependency had a fragment to free or had never
 	 * previously had a block allocated, then the new dependency
 	 * can immediately post its freefrag and adopt the old freefrag.
 	 * This action is done by swapping the freefrag dependencies.
 	 * The new dependency gains the old one's freefrag, and the
 	 * old one gets the new one and then immediately puts it on
 	 * the worklist when it is freed by free_newblk. It is
 	 * not possible to do this swap when the old dependency had a
 	 * non-zero size but no previous fragment to free. This condition
 	 * arises when the new block is an extension of the old block.
 	 * Here, the first part of the fragment allocated to the new
 	 * dependency is part of the block currently claimed on disk by
 	 * the old dependency, so cannot legitimately be freed until the
 	 * conditions for the new dependency are fulfilled.
 	 */
 	freefrag = newadp->ad_freefrag;
 	if (oldadp->ad_freefrag != NULL || oldadp->ad_oldblkno == 0) {
 		newadp->ad_freefrag = oldadp->ad_freefrag;
 		oldadp->ad_freefrag = freefrag;
 	}
 	/*
 	 * If we are tracking a new directory-block allocation,
 	 * move it from the old allocdirect to the new allocdirect.
 	 */
 	if ((wk = LIST_FIRST(&oldadp->ad_newdirblk)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (!LIST_EMPTY(&oldadp->ad_newdirblk))
 			panic("allocdirect_merge: extra newdirblk");
 		WORKLIST_INSERT(&newadp->ad_newdirblk, wk);
 	}
 	TAILQ_REMOVE(adphead, oldadp, ad_next);
 	/*
 	 * We need to move any journal dependencies over to the freefrag
 	 * that releases this block if it exists.  Otherwise we are
 	 * extending an existing block and we'll wait until that is
 	 * complete to release the journal space and extend the
 	 * new journal to cover this old space as well.
 	 */
 	if (freefrag == NULL) {
 		if (oldadp->ad_newblkno != newadp->ad_newblkno)
 			panic("allocdirect_merge: %jd != %jd",
 			    oldadp->ad_newblkno, newadp->ad_newblkno);
 		newadp->ad_block.nb_jnewblk = (struct jnewblk *)
 		    jnewblk_merge(&newadp->ad_block.nb_jnewblk->jn_list, 
 		    &oldadp->ad_block.nb_jnewblk->jn_list,
 		    &newadp->ad_block.nb_jwork);
 		oldadp->ad_block.nb_jnewblk = NULL;
 		cancel_newblk(&oldadp->ad_block, NULL,
 		    &newadp->ad_block.nb_jwork);
 	} else {
 		wk = (struct worklist *) cancel_newblk(&oldadp->ad_block,
 		    &freefrag->ff_list, &freefrag->ff_jwork);
 		freefrag->ff_jdep = jnewblk_merge(freefrag->ff_jdep, wk,
 		    &freefrag->ff_jwork);
 	}
 	free_newblk(&oldadp->ad_block);
 }
 
 /*
  * Allocate a jfreefrag structure to journal a single block free.
  */
 static struct jfreefrag *
 newjfreefrag(freefrag, ip, blkno, size, lbn)
 	struct freefrag *freefrag;
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	long size;
 	ufs_lbn_t lbn;
 {
 	struct jfreefrag *jfreefrag;
 	struct fs *fs;
 
 	fs = ITOFS(ip);
 	jfreefrag = malloc(sizeof(struct jfreefrag), M_JFREEFRAG,
 	    M_SOFTDEP_FLAGS);
 	workitem_alloc(&jfreefrag->fr_list, D_JFREEFRAG, ITOVFS(ip));
 	jfreefrag->fr_jsegdep = newjsegdep(&jfreefrag->fr_list);
 	jfreefrag->fr_state = ATTACHED | DEPCOMPLETE;
 	jfreefrag->fr_ino = ip->i_number;
 	jfreefrag->fr_lbn = lbn;
 	jfreefrag->fr_blkno = blkno;
 	jfreefrag->fr_frags = numfrags(fs, size);
 	jfreefrag->fr_freefrag = freefrag;
 
 	return (jfreefrag);
 }
 
 /*
  * Allocate a new freefrag structure.
  */
 static struct freefrag *
 newfreefrag(ip, blkno, size, lbn)
 	struct inode *ip;
 	ufs2_daddr_t blkno;
 	long size;
 	ufs_lbn_t lbn;
 {
 	struct freefrag *freefrag;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	CTR4(KTR_SUJ, "newfreefrag: ino %d blkno %jd size %ld lbn %jd",
 	    ip->i_number, blkno, size, lbn);
 	ump = ITOUMP(ip);
 	fs = ump->um_fs;
 	if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
 		panic("newfreefrag: frag size");
 	freefrag = malloc(sizeof(struct freefrag),
 	    M_FREEFRAG, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freefrag->ff_list, D_FREEFRAG, UFSTOVFS(ump));
 	freefrag->ff_state = ATTACHED;
 	LIST_INIT(&freefrag->ff_jwork);
 	freefrag->ff_inum = ip->i_number;
 	freefrag->ff_vtype = ITOV(ip)->v_type;
 	freefrag->ff_blkno = blkno;
 	freefrag->ff_fragsize = size;
 
 	if (MOUNTEDSUJ(UFSTOVFS(ump))) {
 		freefrag->ff_jdep = (struct worklist *)
 		    newjfreefrag(freefrag, ip, blkno, size, lbn);
 	} else {
 		freefrag->ff_state |= DEPCOMPLETE;
 		freefrag->ff_jdep = NULL;
 	}
 
 	return (freefrag);
 }
 
 /*
  * This workitem de-allocates fragments that were replaced during
  * file block allocation.
  */
 static void 
 handle_workitem_freefrag(freefrag)
 	struct freefrag *freefrag;
 {
 	struct ufsmount *ump = VFSTOUFS(freefrag->ff_list.wk_mp);
 	struct workhead wkhd;
 
 	CTR3(KTR_SUJ,
 	    "handle_workitem_freefrag: ino %d blkno %jd size %ld",
 	    freefrag->ff_inum, freefrag->ff_blkno, freefrag->ff_fragsize);
 	/*
 	 * It would be illegal to add new completion items to the
 	 * freefrag after it was schedule to be done so it must be
 	 * safe to modify the list head here.
 	 */
 	LIST_INIT(&wkhd);
 	ACQUIRE_LOCK(ump);
 	LIST_SWAP(&freefrag->ff_jwork, &wkhd, worklist, wk_list);
 	/*
 	 * If the journal has not been written we must cancel it here.
 	 */
 	if (freefrag->ff_jdep) {
 		if (freefrag->ff_jdep->wk_type != D_JNEWBLK)
 			panic("handle_workitem_freefrag: Unexpected type %d\n",
 			    freefrag->ff_jdep->wk_type);
 		cancel_jnewblk(WK_JNEWBLK(freefrag->ff_jdep), &wkhd);
 	}
 	FREE_LOCK(ump);
 	ffs_blkfree(ump, ump->um_fs, ump->um_devvp, freefrag->ff_blkno,
 	   freefrag->ff_fragsize, freefrag->ff_inum, freefrag->ff_vtype, &wkhd);
 	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefrag, D_FREEFRAG);
 	FREE_LOCK(ump);
 }
 
 /*
  * Set up a dependency structure for an external attributes data block.
  * This routine follows much of the structure of softdep_setup_allocdirect.
  * See the description of softdep_setup_allocdirect above for details.
  */
 void 
 softdep_setup_allocext(ip, off, newblkno, oldblkno, newsize, oldsize, bp)
 	struct inode *ip;
 	ufs_lbn_t off;
 	ufs2_daddr_t newblkno;
 	ufs2_daddr_t oldblkno;
 	long newsize;
 	long oldsize;
 	struct buf *bp;
 {
 	struct allocdirect *adp, *oldadp;
 	struct allocdirectlst *adphead;
 	struct freefrag *freefrag;
 	struct inodedep *inodedep;
 	struct jnewblk *jnewblk;
 	struct newblk *newblk;
 	struct mount *mp;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocext called on non-softdep filesystem"));
 	KASSERT(off < NXADDR, ("softdep_setup_allocext: lbn %lld > NXADDR",
 		    (long long)off));
 
 	lbn = bp->b_lblkno;
 	if (oldblkno && oldblkno != newblkno)
 		freefrag = newfreefrag(ip, oldblkno, oldsize, lbn);
 	else
 		freefrag = NULL;
 
 	ACQUIRE_LOCK(ump);
 	if (newblk_lookup(mp, newblkno, 0, &newblk) == 0)
 		panic("softdep_setup_allocext: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("softdep_setup_allocext: newblk already initialized"));
 	/*
 	 * Convert the newblk to an allocdirect.
 	 */
 	WORKITEM_REASSIGN(newblk, D_ALLOCDIRECT);
 	adp = (struct allocdirect *)newblk;
 	newblk->nb_freefrag = freefrag;
 	adp->ad_offset = off;
 	adp->ad_oldblkno = oldblkno;
 	adp->ad_newsize = newsize;
 	adp->ad_oldsize = oldsize;
 	adp->ad_state |=  EXTDATA;
 
 	/*
 	 * Finish initializing the journal.
 	 */
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	adp->ad_inodedep = inodedep;
 
 	WORKLIST_INSERT(&bp->b_dep, &newblk->nb_list);
 	/*
 	 * The list of allocdirects must be kept in sorted and ascending
 	 * order so that the rollback routines can quickly determine the
 	 * first uncommitted block (the size of the file stored on disk
 	 * ends at the end of the lowest committed fragment, or if there
 	 * are no fragments, at the end of the highest committed block).
 	 * Since files generally grow, the typical case is that the new
 	 * block is to be added at the end of the list. We speed this
 	 * special case by checking against the last allocdirect in the
 	 * list before laboriously traversing the list looking for the
 	 * insertion point.
 	 */
 	adphead = &inodedep->id_newextupdt;
 	oldadp = TAILQ_LAST(adphead, allocdirectlst);
 	if (oldadp == NULL || oldadp->ad_offset <= off) {
 		/* insert at end of list */
 		TAILQ_INSERT_TAIL(adphead, adp, ad_next);
 		if (oldadp != NULL && oldadp->ad_offset == off)
 			allocdirect_merge(adphead, adp, oldadp);
 		FREE_LOCK(ump);
 		return;
 	}
 	TAILQ_FOREACH(oldadp, adphead, ad_next) {
 		if (oldadp->ad_offset >= off)
 			break;
 	}
 	if (oldadp == NULL)
 		panic("softdep_setup_allocext: lost entry");
 	/* insert in middle of list */
 	TAILQ_INSERT_BEFORE(oldadp, adp, ad_next);
 	if (oldadp->ad_offset == off)
 		allocdirect_merge(adphead, adp, oldadp);
 	FREE_LOCK(ump);
 }
 
 /*
  * Indirect block allocation dependencies.
  * 
  * The same dependencies that exist for a direct block also exist when
  * a new block is allocated and pointed to by an entry in a block of
  * indirect pointers. The undo/redo states described above are also
  * used here. Because an indirect block contains many pointers that
  * may have dependencies, a second copy of the entire in-memory indirect
  * block is kept. The buffer cache copy is always completely up-to-date.
  * The second copy, which is used only as a source for disk writes,
  * contains only the safe pointers (i.e., those that have no remaining
  * update dependencies). The second copy is freed when all pointers
  * are safe. The cache is not allowed to replace indirect blocks with
  * pending update dependencies. If a buffer containing an indirect
  * block with dependencies is written, these routines will mark it
  * dirty again. It can only be successfully written once all the
  * dependencies are removed. The ffs_fsync routine in conjunction with
  * softdep_sync_metadata work together to get all the dependencies
  * removed so that a file can be successfully written to disk. Three
  * procedures are used when setting up indirect block pointer
  * dependencies. The division is necessary because of the organization
  * of the "balloc" routine and because of the distinction between file
  * pages and file metadata blocks.
  */
 
 /*
  * Allocate a new allocindir structure.
  */
 static struct allocindir *
 newallocindir(ip, ptrno, newblkno, oldblkno, lbn)
 	struct inode *ip;	/* inode for file being extended */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
 	ufs_lbn_t lbn;
 {
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct freefrag *freefrag;
 	struct jnewblk *jnewblk;
 
 	if (oldblkno)
 		freefrag = newfreefrag(ip, oldblkno, ITOFS(ip)->fs_bsize, lbn);
 	else
 		freefrag = NULL;
 	ACQUIRE_LOCK(ITOUMP(ip));
 	if (newblk_lookup(ITOVFS(ip), newblkno, 0, &newblk) == 0)
 		panic("new_allocindir: lost block");
 	KASSERT(newblk->nb_list.wk_type == D_NEWBLK,
 	    ("newallocindir: newblk already initialized"));
 	WORKITEM_REASSIGN(newblk, D_ALLOCINDIR);
 	newblk->nb_freefrag = freefrag;
 	aip = (struct allocindir *)newblk;
 	aip->ai_offset = ptrno;
 	aip->ai_oldblkno = oldblkno;
 	aip->ai_lbn = lbn;
 	if ((jnewblk = newblk->nb_jnewblk) != NULL) {
 		jnewblk->jn_ino = ip->i_number;
 		jnewblk->jn_lbn = lbn;
 		add_to_journal(&jnewblk->jn_list);
 	}
 	if (freefrag && freefrag->ff_jdep != NULL &&
 	    freefrag->ff_jdep->wk_type == D_JFREEFRAG)
 		add_to_journal(freefrag->ff_jdep);
 	return (aip);
 }
 
 /*
  * Called just before setting an indirect block pointer
  * to a newly allocated file page.
  */
 void
 softdep_setup_allocindir_page(ip, lbn, bp, ptrno, newblkno, oldblkno, nbp)
 	struct inode *ip;	/* inode for file being extended */
 	ufs_lbn_t lbn;		/* allocated block number within file */
 	struct buf *bp;		/* buffer with indirect blk referencing page */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 	ufs2_daddr_t oldblkno;	/* previous block number, 0 if none */
 	struct buf *nbp;	/* buffer holding allocated page */
 {
 	struct inodedep *inodedep;
 	struct freefrag *freefrag;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_allocindir_page called on non-softdep filesystem"));
 	KASSERT(lbn == nbp->b_lblkno,
 	    ("softdep_setup_allocindir_page: lbn %jd != lblkno %jd",
 	    lbn, bp->b_lblkno));
 	CTR4(KTR_SUJ,
 	    "softdep_setup_allocindir_page: ino %d blkno %jd oldblkno %jd "
 	    "lbn %jd", ip->i_number, newblkno, oldblkno, lbn);
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_page");
 	aip = newallocindir(ip, ptrno, newblkno, oldblkno, lbn);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	/*
 	 * If we are allocating a directory page, then we must
 	 * allocate an associated pagedep to track additions and
 	 * deletions.
 	 */
 	if ((ip->i_mode & IFMT) == IFDIR)
 		pagedep_lookup(mp, nbp, ip->i_number, lbn, DEPALLOC, &pagedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	freefrag = setup_allocindir_phase2(bp, ip, inodedep, aip, lbn);
 	FREE_LOCK(ump);
 	if (freefrag)
 		handle_workitem_freefrag(freefrag);
 }
 
 /*
  * Called just before setting an indirect block pointer to a
  * newly allocated indirect block.
  */
 void
 softdep_setup_allocindir_meta(nbp, ip, bp, ptrno, newblkno)
 	struct buf *nbp;	/* newly allocated indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct buf *bp;		/* indirect block referencing allocated block */
 	int ptrno;		/* offset of pointer in indirect block */
 	ufs2_daddr_t newblkno;	/* disk block number being added */
 {
 	struct inodedep *inodedep;
 	struct allocindir *aip;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_allocindir_meta called on non-softdep filesystem"));
 	CTR3(KTR_SUJ,
 	    "softdep_setup_allocindir_meta: ino %d blkno %jd ptrno %d",
 	    ip->i_number, newblkno, ptrno);
 	lbn = nbp->b_lblkno;
 	ASSERT_VOP_LOCKED(ITOV(ip), "softdep_setup_allocindir_meta");
 	aip = newallocindir(ip, ptrno, newblkno, 0, lbn);
 	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
 	WORKLIST_INSERT(&nbp->b_dep, &aip->ai_block.nb_list);
 	if (setup_allocindir_phase2(bp, ip, inodedep, aip, lbn))
 		panic("softdep_setup_allocindir_meta: Block already existed");
 	FREE_LOCK(ump);
 }
 
 static void
 indirdep_complete(indirdep)
 	struct indirdep *indirdep;
 {
 	struct allocindir *aip;
 
 	LIST_REMOVE(indirdep, ir_next);
 	indirdep->ir_state |= DEPCOMPLETE;
 
 	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL) {
 		LIST_REMOVE(aip, ai_next);
 		free_newblk(&aip->ai_block);
 	}
 	/*
 	 * If this indirdep is not attached to a buf it was simply waiting
 	 * on completion to clear completehd.  free_indirdep() asserts
 	 * that nothing is dangling.
 	 */
 	if ((indirdep->ir_state & ONWORKLIST) == 0)
 		free_indirdep(indirdep);
 }
 
 static struct indirdep *
 indirdep_lookup(mp, ip, bp)
 	struct mount *mp;
 	struct inode *ip;
 	struct buf *bp;
 {
 	struct indirdep *indirdep, *newindirdep;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 	ufs2_daddr_t blkno;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	indirdep = NULL;
 	newindirdep = NULL;
 	fs = ump->um_fs;
 	for (;;) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			if (wk->wk_type != D_INDIRDEP)
 				continue;
 			indirdep = WK_INDIRDEP(wk);
 			break;
 		}
 		/* Found on the buffer worklist, no new structure to free. */
 		if (indirdep != NULL && newindirdep == NULL)
 			return (indirdep);
 		if (indirdep != NULL && newindirdep != NULL)
 			panic("indirdep_lookup: simultaneous create");
 		/* None found on the buffer and a new structure is ready. */
 		if (indirdep == NULL && newindirdep != NULL)
 			break;
 		/* None found and no new structure available. */
 		FREE_LOCK(ump);
 		newindirdep = malloc(sizeof(struct indirdep),
 		    M_INDIRDEP, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newindirdep->ir_list, D_INDIRDEP, mp);
 		newindirdep->ir_state = ATTACHED;
 		if (I_IS_UFS1(ip))
 			newindirdep->ir_state |= UFS1FMT;
 		TAILQ_INIT(&newindirdep->ir_trunc);
 		newindirdep->ir_saveddata = NULL;
 		LIST_INIT(&newindirdep->ir_deplisthd);
 		LIST_INIT(&newindirdep->ir_donehd);
 		LIST_INIT(&newindirdep->ir_writehd);
 		LIST_INIT(&newindirdep->ir_completehd);
 		if (bp->b_blkno == bp->b_lblkno) {
 			ufs_bmaparray(bp->b_vp, bp->b_lblkno, &blkno, bp,
 			    NULL, NULL);
 			bp->b_blkno = blkno;
 		}
 		newindirdep->ir_freeblks = NULL;
 		newindirdep->ir_savebp =
 		    getblk(ump->um_devvp, bp->b_blkno, bp->b_bcount, 0, 0, 0);
 		newindirdep->ir_bp = bp;
 		BUF_KERNPROC(newindirdep->ir_savebp);
 		bcopy(bp->b_data, newindirdep->ir_savebp->b_data, bp->b_bcount);
 		ACQUIRE_LOCK(ump);
 	}
 	indirdep = newindirdep;
 	WORKLIST_INSERT(&bp->b_dep, &indirdep->ir_list);
 	/*
 	 * If the block is not yet allocated we don't set DEPCOMPLETE so
 	 * that we don't free dependencies until the pointers are valid.
 	 * This could search b_dep for D_ALLOCDIRECT/D_ALLOCINDIR rather
 	 * than using the hash.
 	 */
 	if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk))
 		LIST_INSERT_HEAD(&newblk->nb_indirdeps, indirdep, ir_next);
 	else
 		indirdep->ir_state |= DEPCOMPLETE;
 	return (indirdep);
 }
 
 /*
  * Called to finish the allocation of the "aip" allocated
  * by one of the two routines above.
  */
 static struct freefrag *
 setup_allocindir_phase2(bp, ip, inodedep, aip, lbn)
 	struct buf *bp;		/* in-memory copy of the indirect block */
 	struct inode *ip;	/* inode for file being extended */
 	struct inodedep *inodedep; /* Inodedep for ip */
 	struct allocindir *aip;	/* allocindir allocated by the above routines */
 	ufs_lbn_t lbn;		/* Logical block number for this block. */
 {
 	struct fs *fs;
 	struct indirdep *indirdep;
 	struct allocindir *oldaip;
 	struct freefrag *freefrag;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(ip);
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	fs = ump->um_fs;
 	if (bp->b_lblkno >= 0)
 		panic("setup_allocindir_phase2: not indir blk");
 	KASSERT(aip->ai_offset >= 0 && aip->ai_offset < NINDIR(fs),
 	    ("setup_allocindir_phase2: Bad offset %d", aip->ai_offset));
 	indirdep = indirdep_lookup(mp, ip, bp);
 	KASSERT(indirdep->ir_savebp != NULL,
 	    ("setup_allocindir_phase2 NULL ir_savebp"));
 	aip->ai_indirdep = indirdep;
 	/*
 	 * Check for an unwritten dependency for this indirect offset.  If
 	 * there is, merge the old dependency into the new one.  This happens
 	 * as a result of reallocblk only.
 	 */
 	freefrag = NULL;
 	if (aip->ai_oldblkno != 0) {
 		LIST_FOREACH(oldaip, &indirdep->ir_deplisthd, ai_next) {
 			if (oldaip->ai_offset == aip->ai_offset) {
 				freefrag = allocindir_merge(aip, oldaip);
 				goto done;
 			}
 		}
 		LIST_FOREACH(oldaip, &indirdep->ir_donehd, ai_next) {
 			if (oldaip->ai_offset == aip->ai_offset) {
 				freefrag = allocindir_merge(aip, oldaip);
 				goto done;
 			}
 		}
 	}
 done:
 	LIST_INSERT_HEAD(&indirdep->ir_deplisthd, aip, ai_next);
 	return (freefrag);
 }
 
 /*
  * Merge two allocindirs which refer to the same block.  Move newblock
  * dependencies and setup the freefrags appropriately.
  */
 static struct freefrag *
 allocindir_merge(aip, oldaip)
 	struct allocindir *aip;
 	struct allocindir *oldaip;
 {
 	struct freefrag *freefrag;
 	struct worklist *wk;
 
 	if (oldaip->ai_newblkno != aip->ai_oldblkno)
 		panic("allocindir_merge: blkno");
 	aip->ai_oldblkno = oldaip->ai_oldblkno;
 	freefrag = aip->ai_freefrag;
 	aip->ai_freefrag = oldaip->ai_freefrag;
 	oldaip->ai_freefrag = NULL;
 	KASSERT(freefrag != NULL, ("setup_allocindir_phase2: No freefrag"));
 	/*
 	 * If we are tracking a new directory-block allocation,
 	 * move it from the old allocindir to the new allocindir.
 	 */
 	if ((wk = LIST_FIRST(&oldaip->ai_newdirblk)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (!LIST_EMPTY(&oldaip->ai_newdirblk))
 			panic("allocindir_merge: extra newdirblk");
 		WORKLIST_INSERT(&aip->ai_newdirblk, wk);
 	}
 	/*
 	 * We can skip journaling for this freefrag and just complete
 	 * any pending journal work for the allocindir that is being
 	 * removed after the freefrag completes.
 	 */
 	if (freefrag->ff_jdep)
 		cancel_jfreefrag(WK_JFREEFRAG(freefrag->ff_jdep));
 	LIST_REMOVE(oldaip, ai_next);
 	freefrag->ff_jdep = (struct worklist *)cancel_newblk(&oldaip->ai_block,
 	    &freefrag->ff_list, &freefrag->ff_jwork);
 	free_newblk(&oldaip->ai_block);
 
 	return (freefrag);
 }
 
 static inline void
 setup_freedirect(freeblks, ip, i, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 	int frags;
 
 	blkno = DIP(ip, i_db[i]);
 	if (blkno == 0)
 		return;
 	DIP_SET(ip, i_db[i], 0);
 	ump = ITOUMP(ip);
 	frags = sblksize(ump->um_fs, ip->i_size, i);
 	frags = numfrags(ump->um_fs, frags);
 	newfreework(ump, freeblks, NULL, i, blkno, frags, 0, needj);
 }
 
 static inline void
 setup_freeext(freeblks, ip, i, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 	int frags;
 
 	blkno = ip->i_din2->di_extb[i];
 	if (blkno == 0)
 		return;
 	ip->i_din2->di_extb[i] = 0;
 	ump = ITOUMP(ip);
 	frags = sblksize(ump->um_fs, ip->i_din2->di_extsize, i);
 	frags = numfrags(ump->um_fs, frags);
 	newfreework(ump, freeblks, NULL, -1 - i, blkno, frags, 0, needj);
 }
 
 static inline void
 setup_freeindir(freeblks, ip, i, lbn, needj)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	int i;
 	ufs_lbn_t lbn;
 	int needj;
 {
 	struct ufsmount *ump;
 	ufs2_daddr_t blkno;
 
 	blkno = DIP(ip, i_ib[i]);
 	if (blkno == 0)
 		return;
 	DIP_SET(ip, i_ib[i], 0);
 	ump = ITOUMP(ip);
 	newfreework(ump, freeblks, NULL, lbn, blkno, ump->um_fs->fs_frag,
 	    0, needj);
 }
 
 static inline struct freeblks *
 newfreeblks(mp, ip)
 	struct mount *mp;
 	struct inode *ip;
 {
 	struct freeblks *freeblks;
 
 	freeblks = malloc(sizeof(struct freeblks),
 		M_FREEBLKS, M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&freeblks->fb_list, D_FREEBLKS, mp);
 	LIST_INIT(&freeblks->fb_jblkdephd);
 	LIST_INIT(&freeblks->fb_jwork);
 	freeblks->fb_ref = 0;
 	freeblks->fb_cgwait = 0;
 	freeblks->fb_state = ATTACHED;
 	freeblks->fb_uid = ip->i_uid;
 	freeblks->fb_inum = ip->i_number;
 	freeblks->fb_vtype = ITOV(ip)->v_type;
 	freeblks->fb_modrev = DIP(ip, i_modrev);
 	freeblks->fb_devvp = ITODEVVP(ip);
 	freeblks->fb_chkcnt = 0;
 	freeblks->fb_len = 0;
 
 	return (freeblks);
 }
 
 static void
 trunc_indirdep(indirdep, freeblks, bp, off)
 	struct indirdep *indirdep;
 	struct freeblks *freeblks;
 	struct buf *bp;
 	int off;
 {
 	struct allocindir *aip, *aipn;
 
 	/*
 	 * The first set of allocindirs won't be in savedbp.
 	 */
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_deplisthd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, bp, freeblks, 1);
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_donehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, bp, freeblks, 1);
 	/*
 	 * These will exist in savedbp.
 	 */
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_writehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, NULL, freeblks, 0);
 	LIST_FOREACH_SAFE(aip, &indirdep->ir_completehd, ai_next, aipn)
 		if (aip->ai_offset > off)
 			cancel_allocindir(aip, NULL, freeblks, 0);
 }
 
 /*
  * Follow the chain of indirects down to lastlbn creating a freework
  * structure for each.  This will be used to start indir_trunc() at
  * the right offset and create the journal records for the parrtial
  * truncation.  A second step will handle the truncated dependencies.
  */
 static int
 setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno)
 	struct freeblks *freeblks;
 	struct inode *ip;
 	ufs_lbn_t lbn;
 	ufs_lbn_t lastlbn;
 	ufs2_daddr_t blkno;
 {
 	struct indirdep *indirdep;
 	struct indirdep *indirn;
 	struct freework *freework;
 	struct newblk *newblk;
 	struct mount *mp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	uint8_t *start;
 	uint8_t *end;
 	ufs_lbn_t lbnadd;
 	int level;
 	int error;
 	int off;
 
 
 	freework = NULL;
 	if (blkno == 0)
 		return (0);
 	mp = freeblks->fb_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	bp = getblk(ITOV(ip), lbn, mp->mnt_stat.f_iosize, 0, 0, 0);
 	if ((bp->b_flags & B_CACHE) == 0) {
 		bp->b_blkno = blkptrtodb(VFSTOUFS(mp), blkno);
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		vfs_busy_pages(bp, 0);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
 #ifdef RACCT
 		if (racct_enable) {
 			PROC_LOCK(curproc);
 			racct_add_buf(curproc, bp, 0);
 			PROC_UNLOCK(curproc);
 		}
 #endif /* RACCT */
 		curthread->td_ru.ru_inblock++;
 		error = bufwait(bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 	}
 	level = lbn_level(lbn);
 	lbnadd = lbn_offset(ump->um_fs, level);
 	/*
 	 * Compute the offset of the last block we want to keep.  Store
 	 * in the freework the first block we want to completely free.
 	 */
 	off = (lastlbn - -(lbn + level)) / lbnadd;
 	if (off + 1 == NINDIR(ump->um_fs))
 		goto nowork;
 	freework = newfreework(ump, freeblks, NULL, lbn, blkno, 0, off + 1, 0);
 	/*
 	 * Link the freework into the indirdep.  This will prevent any new
 	 * allocations from proceeding until we are finished with the
 	 * truncate and the block is written.
 	 */
 	ACQUIRE_LOCK(ump);
 	indirdep = indirdep_lookup(mp, ip, bp);
 	if (indirdep->ir_freeblks)
 		panic("setup_trunc_indir: indirdep already truncated.");
 	TAILQ_INSERT_TAIL(&indirdep->ir_trunc, freework, fw_next);
 	freework->fw_indir = indirdep;
 	/*
 	 * Cancel any allocindirs that will not make it to disk.
 	 * We have to do this for all copies of the indirdep that
 	 * live on this newblk.
 	 */
 	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 		newblk_lookup(mp, dbtofsb(ump->um_fs, bp->b_blkno), 0, &newblk);
 		LIST_FOREACH(indirn, &newblk->nb_indirdeps, ir_next)
 			trunc_indirdep(indirn, freeblks, bp, off);
 	} else
 		trunc_indirdep(indirdep, freeblks, bp, off);
 	FREE_LOCK(ump);
 	/*
 	 * Creation is protected by the buf lock. The saveddata is only
 	 * needed if a full truncation follows a partial truncation but it
 	 * is difficult to allocate in that case so we fetch it anyway.
 	 */
 	if (indirdep->ir_saveddata == NULL)
 		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
 		    M_SOFTDEP_FLAGS);
 nowork:
 	/* Fetch the blkno of the child and the zero start offset. */
 	if (I_IS_UFS1(ip)) {
 		blkno = ((ufs1_daddr_t *)bp->b_data)[off];
 		start = (uint8_t *)&((ufs1_daddr_t *)bp->b_data)[off+1];
 	} else {
 		blkno = ((ufs2_daddr_t *)bp->b_data)[off];
 		start = (uint8_t *)&((ufs2_daddr_t *)bp->b_data)[off+1];
 	}
 	if (freework) {
 		/* Zero the truncated pointers. */
 		end = bp->b_data + bp->b_bcount;
 		bzero(start, end - start);
 		bdwrite(bp);
 	} else
 		bqrelse(bp);
 	if (level == 0)
 		return (0);
 	lbn++; /* adjust level */
 	lbn -= (off * lbnadd);
 	return setup_trunc_indir(freeblks, ip, lbn, lastlbn, blkno);
 }
 
 /*
  * Complete the partial truncation of an indirect block setup by
  * setup_trunc_indir().  This zeros the truncated pointers in the saved
  * copy and writes them to disk before the freeblks is allowed to complete.
  */
 static void
 complete_trunc_indir(freework)
 	struct freework *freework;
 {
 	struct freework *fwn;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
 	struct buf *bp;
 	uintptr_t start;
 	int count;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LOCK_OWNED(ump);
 	indirdep = freework->fw_indir;
 	for (;;) {
 		bp = indirdep->ir_bp;
 		/* See if the block was discarded. */
 		if (bp == NULL)
 			break;
 		/* Inline part of getdirtybuf().  We dont want bremfree. */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) == 0)
 			break;
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 		    LOCK_PTR(ump)) == 0)
 			BUF_UNLOCK(bp);
 		ACQUIRE_LOCK(ump);
 	}
 	freework->fw_state |= DEPCOMPLETE;
 	TAILQ_REMOVE(&indirdep->ir_trunc, freework, fw_next);
 	/*
 	 * Zero the pointers in the saved copy.
 	 */
 	if (indirdep->ir_state & UFS1FMT)
 		start = sizeof(ufs1_daddr_t);
 	else
 		start = sizeof(ufs2_daddr_t);
 	start *= freework->fw_start;
 	count = indirdep->ir_savebp->b_bcount - start;
 	start += (uintptr_t)indirdep->ir_savebp->b_data;
 	bzero((char *)start, count);
 	/*
 	 * We need to start the next truncation in the list if it has not
 	 * been started yet.
 	 */
 	fwn = TAILQ_FIRST(&indirdep->ir_trunc);
 	if (fwn != NULL) {
 		if (fwn->fw_freeblks == indirdep->ir_freeblks)
 			TAILQ_REMOVE(&indirdep->ir_trunc, fwn, fw_next);
 		if ((fwn->fw_state & ONWORKLIST) == 0)
 			freework_enqueue(fwn);
 	}
 	/*
 	 * If bp is NULL the block was fully truncated, restore
 	 * the saved block list otherwise free it if it is no
 	 * longer needed.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		if (bp == NULL)
 			bcopy(indirdep->ir_saveddata,
 			    indirdep->ir_savebp->b_data,
 			    indirdep->ir_savebp->b_bcount);
 		free(indirdep->ir_saveddata, M_INDIRDEP);
 		indirdep->ir_saveddata = NULL;
 	}
 	/*
 	 * When bp is NULL there is a full truncation pending.  We
 	 * must wait for this full truncation to be journaled before
 	 * we can release this freework because the disk pointers will
 	 * never be written as zero.
 	 */
 	if (bp == NULL)  {
 		if (LIST_EMPTY(&indirdep->ir_freeblks->fb_jblkdephd))
 			handle_written_freework(freework);
 		else
 			WORKLIST_INSERT(&indirdep->ir_freeblks->fb_freeworkhd,
 			   &freework->fw_list);
 	} else {
 		/* Complete when the real copy is written. */
 		WORKLIST_INSERT(&bp->b_dep, &freework->fw_list);
 		BUF_UNLOCK(bp);
 	}
 }
 
 /*
  * Calculate the number of blocks we are going to release where datablocks
  * is the current total and length is the new file size.
  */
 static ufs2_daddr_t
 blkcount(fs, datablocks, length)
 	struct fs *fs;
 	ufs2_daddr_t datablocks;
 	off_t length;
 {
 	off_t totblks, numblks;
 
 	totblks = 0;
 	numblks = howmany(length, fs->fs_bsize);
 	if (numblks <= NDADDR) {
 		totblks = howmany(length, fs->fs_fsize);
 		goto out;
 	}
         totblks = blkstofrags(fs, numblks);
 	numblks -= NDADDR;
 	/*
 	 * Count all single, then double, then triple indirects required.
 	 * Subtracting one indirects worth of blocks for each pass
 	 * acknowledges one of each pointed to by the inode.
 	 */
 	for (;;) {
 		totblks += blkstofrags(fs, howmany(numblks, NINDIR(fs)));
 		numblks -= NINDIR(fs);
 		if (numblks <= 0)
 			break;
 		numblks = howmany(numblks, NINDIR(fs));
 	}
 out:
 	totblks = fsbtodb(fs, totblks);
 	/*
 	 * Handle sparse files.  We can't reclaim more blocks than the inode
 	 * references.  We will correct it later in handle_complete_freeblks()
 	 * when we know the real count.
 	 */
 	if (totblks > datablocks)
 		return (0);
 	return (datablocks - totblks);
 }
 
 /*
  * Handle freeblocks for journaled softupdate filesystems.
  *
  * Contrary to normal softupdates, we must preserve the block pointers in
  * indirects until their subordinates are free.  This is to avoid journaling
  * every block that is freed which may consume more space than the journal
  * itself.  The recovery program will see the free block journals at the
  * base of the truncated area and traverse them to reclaim space.  The
  * pointers in the inode may be cleared immediately after the journal
  * records are written because each direct and indirect pointer in the
  * inode is recorded in a journal.  This permits full truncation to proceed
  * asynchronously.  The write order is journal -> inode -> cgs -> indirects.
  *
  * The algorithm is as follows:
  * 1) Traverse the in-memory state and create journal entries to release
  *    the relevant blocks and full indirect trees.
  * 2) Traverse the indirect block chain adding partial truncation freework
  *    records to indirects in the path to lastlbn.  The freework will
  *    prevent new allocation dependencies from being satisfied in this
  *    indirect until the truncation completes.
  * 3) Read and lock the inode block, performing an update with the new size
  *    and pointers.  This prevents truncated data from becoming valid on
  *    disk through step 4.
  * 4) Reap unsatisfied dependencies that are beyond the truncated area,
  *    eliminate journal work for those records that do not require it.
  * 5) Schedule the journal records to be written followed by the inode block.
  * 6) Allocate any necessary frags for the end of file.
  * 7) Zero any partially truncated blocks.
  *
  * From this truncation proceeds asynchronously using the freework and
  * indir_trunc machinery.  The file will not be extended again into a
  * partially truncated indirect block until all work is completed but
  * the normal dependency mechanism ensures that it is rolled back/forward
  * as appropriate.  Further truncation may occur without delay and is
  * serialized in indir_trunc().
  */
 void
 softdep_journal_freeblocks(ip, cred, length, flags)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	struct ucred *cred;
 	off_t length;		/* The new length for the file */
 	int flags;		/* IO_EXT and/or IO_NORMAL */
 {
 	struct freeblks *freeblks, *fbn;
 	struct worklist *wk, *wkn;
 	struct inodedep *inodedep;
 	struct jblkdep *jblkdep;
 	struct allocdirect *adp, *adpn;
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 	struct vnode *vp;
 	struct mount *mp;
 	ufs2_daddr_t extblocks, datablocks;
 	ufs_lbn_t tmpval, lbn, lastlbn;
 	int frags, lastoff, iboff, allocblock, needj, error, i;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	fs = ump->um_fs;
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_journal_freeblocks called on non-softdep filesystem"));
 	vp = ITOV(ip);
 	needj = 1;
 	iboff = -1;
 	allocblock = 0;
 	extblocks = 0;
 	datablocks = 0;
 	frags = 0;
 	freeblks = newfreeblks(mp, ip);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * If we're truncating a removed file that will never be written
 	 * we don't need to journal the block frees.  The canceled journals
 	 * for the allocations will suffice.
 	 */
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & (UNLINKED | DEPCOMPLETE)) == UNLINKED &&
 	    length == 0)
 		needj = 0;
 	CTR3(KTR_SUJ, "softdep_journal_freeblks: ip %d length %ld needj %d",
 	    ip->i_number, length, needj);
 	FREE_LOCK(ump);
 	/*
 	 * Calculate the lbn that we are truncating to.  This results in -1
 	 * if we're truncating the 0 bytes.  So it is the last lbn we want
 	 * to keep, not the first lbn we want to truncate.
 	 */
 	lastlbn = lblkno(fs, length + fs->fs_bsize - 1) - 1;
 	lastoff = blkoff(fs, length);
 	/*
 	 * Compute frags we are keeping in lastlbn.  0 means all.
 	 */
 	if (lastlbn >= 0 && lastlbn < NDADDR) {
 		frags = fragroundup(fs, lastoff);
 		/* adp offset of last valid allocdirect. */
 		iboff = lastlbn;
 	} else if (lastlbn > 0)
 		iboff = NDADDR;
 	if (fs->fs_magic == FS_UFS2_MAGIC)
 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 	/*
 	 * Handle normal data blocks and indirects.  This section saves
 	 * values used after the inode update to complete frag and indirect
 	 * truncation.
 	 */
 	if ((flags & IO_NORMAL) != 0) {
 		/*
 		 * Handle truncation of whole direct and indirect blocks.
 		 */
 		for (i = iboff + 1; i < NDADDR; i++)
 			setup_freedirect(freeblks, ip, i, needj);
 		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
 		    i++, lbn += tmpval, tmpval *= NINDIR(fs)) {
 			/* Release a whole indirect tree. */
 			if (lbn > lastlbn) {
 				setup_freeindir(freeblks, ip, i, -lbn -i,
 				    needj);
 				continue;
 			}
 			iboff = i + NDADDR;
 			/*
 			 * Traverse partially truncated indirect tree.
 			 */
 			if (lbn <= lastlbn && lbn + tmpval - 1 > lastlbn)
 				setup_trunc_indir(freeblks, ip, -lbn - i,
 				    lastlbn, DIP(ip, i_ib[i]));
 		}
 		/*
 		 * Handle partial truncation to a frag boundary.
 		 */
 		if (frags) {
 			ufs2_daddr_t blkno;
 			long oldfrags;
 
 			oldfrags = blksize(fs, ip, lastlbn);
 			blkno = DIP(ip, i_db[lastlbn]);
 			if (blkno && oldfrags != frags) {
 				oldfrags -= frags;
 				oldfrags = numfrags(fs, oldfrags);
 				blkno += numfrags(fs, frags);
 				newfreework(ump, freeblks, NULL, lastlbn,
 				    blkno, oldfrags, 0, needj);
 				if (needj)
 					adjust_newfreework(freeblks,
 					    numfrags(fs, frags));
 			} else if (blkno == 0)
 				allocblock = 1;
 		}
 		/*
 		 * Add a journal record for partial truncate if we are
 		 * handling indirect blocks.  Non-indirects need no extra
 		 * journaling.
 		 */
 		if (length != 0 && lastlbn >= NDADDR) {
 			ip->i_flag |= IN_TRUNCATED;
 			newjtrunc(freeblks, length, 0);
 		}
 		ip->i_size = length;
 		DIP_SET(ip, i_size, ip->i_size);
 		datablocks = DIP(ip, i_blocks) - extblocks;
 		if (length != 0)
 			datablocks = blkcount(fs, datablocks, length);
 		freeblks->fb_len = length;
 	}
 	if ((flags & IO_EXT) != 0) {
 		for (i = 0; i < NXADDR; i++)
 			setup_freeext(freeblks, ip, i, needj);
 		ip->i_din2->di_extsize = 0;
 		datablocks += extblocks;
 	}
 #ifdef QUOTA
 	/* Reference the quotas in case the block count is wrong in the end. */
 	quotaref(vp, freeblks->fb_quota);
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
 	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
 	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
 	 * Handle truncation of incomplete alloc direct dependencies.  We
 	 * hold the inode block locked to prevent incomplete dependencies
 	 * from reaching the disk while we are eliminating those that
 	 * have been truncated.  This is a partially inlined ffs_update().
 	 */
 	ufs_itimes(vp);
 	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, cred, &bp);
 	if (error) {
 		brelse(bp);
 		softdep_error("softdep_journal_freeblocks", error);
 		return;
 	}
 	if (bp->b_bufsize == fs->fs_bsize)
 		bp->b_flags |= B_CLUSTEROK;
 	softdep_update_inodeblock(ip, bp, 0);
 	if (ump->um_fstype == UFS1)
 		*((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
 	else
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
 	ACQUIRE_LOCK(ump);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk. If we
 	 * still have a bitmap dependency (needj), then the inode
 	 * has never been written to disk, so we can process the
 	 * freeblks below once we have deleted the dependencies.
 	 */
 	if (needj)
 		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
 	else
 		freeblks->fb_state |= COMPLETE;
 	if ((flags & IO_NORMAL) != 0) {
 		TAILQ_FOREACH_SAFE(adp, &inodedep->id_inoupdt, ad_next, adpn) {
 			if (adp->ad_offset > iboff)
 				cancel_allocdirect(&inodedep->id_inoupdt, adp,
 				    freeblks);
 			/*
 			 * Truncate the allocdirect.  We could eliminate
 			 * or modify journal records as well.
 			 */
 			else if (adp->ad_offset == iboff && frags)
 				adp->ad_newsize = frags;
 		}
 	}
 	if ((flags & IO_EXT) != 0)
 		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	/*
 	 * Scan the bufwait list for newblock dependencies that will never
 	 * make it to disk.
 	 */
 	LIST_FOREACH_SAFE(wk, &inodedep->id_bufwait, wk_list, wkn) {
 		if (wk->wk_type != D_ALLOCDIRECT)
 			continue;
 		adp = WK_ALLOCDIRECT(wk);
 		if (((flags & IO_NORMAL) != 0 && (adp->ad_offset > iboff)) ||
 		    ((flags & IO_EXT) != 0 && (adp->ad_state & EXTDATA))) {
 			cancel_jfreeblk(freeblks, adp->ad_newblkno);
 			cancel_newblk(WK_NEWBLK(wk), NULL, &freeblks->fb_jwork);
 			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
 		}
 	}
 	/*
 	 * Add journal work.
 	 */
 	LIST_FOREACH(jblkdep, &freeblks->fb_jblkdephd, jb_deps)
 		add_to_journal(&jblkdep->jb_list);
 	FREE_LOCK(ump);
 	bdwrite(bp);
 	/*
 	 * Truncate dependency structures beyond length.
 	 */
 	trunc_dependencies(ip, freeblks, lastlbn, frags, flags);
 	/*
 	 * This is only set when we need to allocate a fragment because
 	 * none existed at the end of a frag-sized file.  It handles only
 	 * allocating a new, zero filled block.
 	 */
 	if (allocblock) {
 		ip->i_size = length - lastoff;
 		DIP_SET(ip, i_size, ip->i_size);
 		error = UFS_BALLOC(vp, length - 1, 1, cred, BA_CLRBUF, &bp);
 		if (error != 0) {
 			softdep_error("softdep_journal_freeblks", error);
 			return;
 		}
 		ip->i_size = length;
 		DIP_SET(ip, i_size, length);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		allocbuf(bp, frags);
 		ffs_update(vp, 0);
 		bawrite(bp);
 	} else if (lastoff != 0 && vp->v_type != VDIR) {
 		int size;
 
 		/*
 		 * Zero the end of a truncated frag or block.
 		 */
 		size = sblksize(fs, length, lastlbn);
 		error = bread(vp, lastlbn, size, cred, &bp);
 		if (error) {
 			softdep_error("softdep_journal_freeblks", error);
 			return;
 		}
 		bzero((char *)bp->b_data + lastoff, size - lastoff);
 		bawrite(bp);
 
 	}
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	TAILQ_INSERT_TAIL(&inodedep->id_freeblklst, freeblks, fb_next);
 	freeblks->fb_state |= DEPCOMPLETE | ONDEPLIST;
 	/*
 	 * We zero earlier truncations so they don't erroneously
 	 * update i_blocks.
 	 */
 	if (freeblks->fb_len == 0 && (flags & IO_NORMAL) != 0)
 		TAILQ_FOREACH(fbn, &inodedep->id_freeblklst, fb_next)
 			fbn->fb_len = 0;
 	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE &&
 	    LIST_EMPTY(&freeblks->fb_jblkdephd))
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
 	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
 
 }
 
 /*
  * Flush a JOP_SYNC to the journal.
  */
 void
 softdep_journal_fsync(ip)
 	struct inode *ip;
 {
 	struct jfsync *jfsync;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_journal_fsync called on non-softdep filesystem"));
 	if ((ip->i_flag & IN_TRUNCATED) == 0)
 		return;
 	ip->i_flag &= ~IN_TRUNCATED;
 	jfsync = malloc(sizeof(*jfsync), M_JFSYNC, M_SOFTDEP_FLAGS | M_ZERO);
 	workitem_alloc(&jfsync->jfs_list, D_JFSYNC, UFSTOVFS(ump));
 	jfsync->jfs_size = ip->i_size;
 	jfsync->jfs_ino = ip->i_number;
 	ACQUIRE_LOCK(ump);
 	add_to_journal(&jfsync->jfs_list);
 	jwait(&jfsync->jfs_list, MNT_WAIT);
 	FREE_LOCK(ump);
 }
 
 /*
  * Block de-allocation dependencies.
  * 
  * When blocks are de-allocated, the on-disk pointers must be nullified before
  * the blocks are made available for use by other files.  (The true
  * requirement is that old pointers must be nullified before new on-disk
  * pointers are set.  We chose this slightly more stringent requirement to
  * reduce complexity.) Our implementation handles this dependency by updating
  * the inode (or indirect block) appropriately but delaying the actual block
  * de-allocation (i.e., freemap and free space count manipulation) until
  * after the updated versions reach stable storage.  After the disk is
  * updated, the blocks can be safely de-allocated whenever it is convenient.
  * This implementation handles only the common case of reducing a file's
  * length to zero. Other cases are handled by the conventional synchronous
  * write approach.
  *
  * The ffs implementation with which we worked double-checks
  * the state of the block pointers and file size as it reduces
  * a file's length.  Some of this code is replicated here in our
  * soft updates implementation.  The freeblks->fb_chkcnt field is
  * used to transfer a part of this information to the procedure
  * that eventually de-allocates the blocks.
  *
  * This routine should be called from the routine that shortens
  * a file's length, before the inode's size or block pointers
  * are modified. It will save the block pointer information for
  * later release and zero the inode so that the calling routine
  * can release it.
  */
 void
 softdep_setup_freeblocks(ip, length, flags)
 	struct inode *ip;	/* The inode whose length is to be reduced */
 	off_t length;		/* The new length for the file */
 	int flags;		/* IO_EXT and/or IO_NORMAL */
 {
 	struct ufs1_dinode *dp1;
 	struct ufs2_dinode *dp2;
 	struct freeblks *freeblks;
 	struct inodedep *inodedep;
 	struct allocdirect *adp;
 	struct ufsmount *ump;
 	struct buf *bp;
 	struct fs *fs;
 	ufs2_daddr_t extblocks, datablocks;
 	struct mount *mp;
 	int i, delay, error;
 	ufs_lbn_t tmpval;
 	ufs_lbn_t lbn;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_freeblocks called on non-softdep filesystem"));
 	CTR2(KTR_SUJ, "softdep_setup_freeblks: ip %d length %ld",
 	    ip->i_number, length);
 	KASSERT(length == 0, ("softdep_setup_freeblocks: non-zero length"));
 	fs = ump->um_fs;
 	if ((error = bread(ump->um_devvp,
 	    fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, NOCRED, &bp)) != 0) {
 		brelse(bp);
 		softdep_error("softdep_setup_freeblocks", error);
 		return;
 	}
 	freeblks = newfreeblks(mp, ip);
 	extblocks = 0;
 	datablocks = 0;
 	if (fs->fs_magic == FS_UFS2_MAGIC)
 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
 	if ((flags & IO_NORMAL) != 0) {
 		for (i = 0; i < NDADDR; i++)
 			setup_freedirect(freeblks, ip, i, 0);
 		for (i = 0, tmpval = NINDIR(fs), lbn = NDADDR; i < NIADDR;
 		    i++, lbn += tmpval, tmpval *= NINDIR(fs))
 			setup_freeindir(freeblks, ip, i, -lbn -i, 0);
 		ip->i_size = 0;
 		DIP_SET(ip, i_size, 0);
 		datablocks = DIP(ip, i_blocks) - extblocks;
 	}
 	if ((flags & IO_EXT) != 0) {
 		for (i = 0; i < NXADDR; i++)
 			setup_freeext(freeblks, ip, i, 0);
 		ip->i_din2->di_extsize = 0;
 		datablocks += extblocks;
 	}
 #ifdef QUOTA
 	/* Reference the quotas in case the block count is wrong in the end. */
 	quotaref(ITOV(ip), freeblks->fb_quota);
 	(void) chkdq(ip, -datablocks, NOCRED, 0);
 #endif
 	freeblks->fb_chkcnt = -datablocks;
 	UFS_LOCK(ump);
 	fs->fs_pendingblocks += datablocks;
 	UFS_UNLOCK(ump);
 	DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - datablocks);
 	/*
 	 * Push the zero'ed inode to to its disk buffer so that we are free
 	 * to delete its dependencies below. Once the dependencies are gone
 	 * the buffer can be safely released.
 	 */
 	if (ump->um_fstype == UFS1) {
 		dp1 = ((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_din1->di_freelink = dp1->di_freelink;
 		*dp1 = *ip->i_din1;
 	} else {
 		dp2 = ((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number));
 		ip->i_din2->di_freelink = dp2->di_freelink;
 		*dp2 = *ip->i_din2;
 	}
 	/*
 	 * Find and eliminate any inode dependencies.
 	 */
 	ACQUIRE_LOCK(ump);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
 		panic("softdep_setup_freeblocks: inode busy");
 	/*
 	 * Add the freeblks structure to the list of operations that
 	 * must await the zero'ed inode being written to disk. If we
 	 * still have a bitmap dependency (delay == 0), then the inode
 	 * has never been written to disk, so we can process the
 	 * freeblks below once we have deleted the dependencies.
 	 */
 	delay = (inodedep->id_state & DEPCOMPLETE);
 	if (delay)
 		WORKLIST_INSERT(&bp->b_dep, &freeblks->fb_list);
 	else
 		freeblks->fb_state |= COMPLETE;
 	/*
 	 * Because the file length has been truncated to zero, any
 	 * pending block allocation dependency structures associated
 	 * with this inode are obsolete and can simply be de-allocated.
 	 * We must first merge the two dependency lists to get rid of
 	 * any duplicate freefrag structures, then purge the merged list.
 	 * If we still have a bitmap dependency, then the inode has never
 	 * been written to disk, so we can free any fragments without delay.
 	 */
 	if (flags & IO_NORMAL) {
 		merge_inode_lists(&inodedep->id_newinoupdt,
 		    &inodedep->id_inoupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_inoupdt, adp,
 			    freeblks);
 	}
 	if (flags & IO_EXT) {
 		merge_inode_lists(&inodedep->id_newextupdt,
 		    &inodedep->id_extupdt);
 		while ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 			cancel_allocdirect(&inodedep->id_extupdt, adp,
 			    freeblks);
 	}
 	FREE_LOCK(ump);
 	bdwrite(bp);
 	trunc_dependencies(ip, freeblks, -1, 0, flags);
 	ACQUIRE_LOCK(ump);
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	freeblks->fb_state |= DEPCOMPLETE;
 	/*
 	 * If the inode with zeroed block pointers is now on disk
 	 * we can start freeing blocks.
 	 */  
 	if ((freeblks->fb_state & ALLCOMPLETE) == ALLCOMPLETE)
 		freeblks->fb_state |= INPROGRESS;
 	else
 		freeblks = NULL;
 	FREE_LOCK(ump);
 	if (freeblks)
 		handle_workitem_freeblocks(freeblks, 0);
 	trunc_pages(ip, length, extblocks, flags);
 }
 
 /*
  * Eliminate pages from the page cache that back parts of this inode and
  * adjust the vnode pager's idea of our size.  This prevents stale data
  * from hanging around in the page cache.
  */
 static void
 trunc_pages(ip, length, extblocks, flags)
 	struct inode *ip;
 	off_t length;
 	ufs2_daddr_t extblocks;
 	int flags;
 {
 	struct vnode *vp;
 	struct fs *fs;
 	ufs_lbn_t lbn;
 	off_t end, extend;
 
 	vp = ITOV(ip);
 	fs = ITOFS(ip);
 	extend = OFF_TO_IDX(lblktosize(fs, -extblocks));
 	if ((flags & IO_EXT) != 0)
 		vn_pages_remove(vp, extend, 0);
 	if ((flags & IO_NORMAL) == 0)
 		return;
 	BO_LOCK(&vp->v_bufobj);
 	drain_output(vp);
 	BO_UNLOCK(&vp->v_bufobj);
 	/*
 	 * The vnode pager eliminates file pages we eliminate indirects
 	 * below.
 	 */
 	vnode_pager_setsize(vp, length);
 	/*
 	 * Calculate the end based on the last indirect we want to keep.  If
 	 * the block extends into indirects we can just use the negative of
 	 * its lbn.  Doubles and triples exist at lower numbers so we must
 	 * be careful not to remove those, if they exist.  double and triple
 	 * indirect lbns do not overlap with others so it is not important
 	 * to verify how many levels are required.
 	 */
 	lbn = lblkno(fs, length);
 	if (lbn >= NDADDR) {
 		/* Calculate the virtual lbn of the triple indirect. */
 		lbn = -lbn - (NIADDR - 1);
 		end = OFF_TO_IDX(lblktosize(fs, lbn));
 	} else
 		end = extend;
 	vn_pages_remove(vp, OFF_TO_IDX(OFF_MAX), end);
 }
 
 /*
  * See if the buf bp is in the range eliminated by truncation.
  */
 static int
 trunc_check_buf(bp, blkoffp, lastlbn, lastoff, flags)
 	struct buf *bp;
 	int *blkoffp;
 	ufs_lbn_t lastlbn;
 	int lastoff;
 	int flags;
 {
 	ufs_lbn_t lbn;
 
 	*blkoffp = 0;
 	/* Only match ext/normal blocks as appropriate. */
 	if (((flags & IO_EXT) == 0 && (bp->b_xflags & BX_ALTDATA)) ||
 	    ((flags & IO_NORMAL) == 0 && (bp->b_xflags & BX_ALTDATA) == 0))
 		return (0);
 	/* ALTDATA is always a full truncation. */
 	if ((bp->b_xflags & BX_ALTDATA) != 0)
 		return (1);
 	/* -1 is full truncation. */
 	if (lastlbn == -1)
 		return (1);
 	/*
 	 * If this is a partial truncate we only want those
 	 * blocks and indirect blocks that cover the range
 	 * we're after.
 	 */
 	lbn = bp->b_lblkno;
 	if (lbn < 0)
 		lbn = -(lbn + lbn_level(lbn));
 	if (lbn < lastlbn)
 		return (0);
 	/* Here we only truncate lblkno if it's partial. */
 	if (lbn == lastlbn) {
 		if (lastoff == 0)
 			return (0);
 		*blkoffp = lastoff;
 	}
 	return (1);
 }
 
 /*
  * Eliminate any dependencies that exist in memory beyond lblkno:off
  */
 static void
 trunc_dependencies(ip, freeblks, lastlbn, lastoff, flags)
 	struct inode *ip;
 	struct freeblks *freeblks;
 	ufs_lbn_t lastlbn;
 	int lastoff;
 	int flags;
 {
 	struct bufobj *bo;
 	struct vnode *vp;
 	struct buf *bp;
 	int blkoff;
 
 	/*
 	 * We must wait for any I/O in progress to finish so that
 	 * all potential buffers on the dirty list will be visible.
 	 * Once they are all there, walk the list and get rid of
 	 * any dependencies.
 	 */
 	vp = ITOV(ip);
 	bo = &vp->v_bufobj;
 	BO_LOCK(bo);
 	drain_output(vp);
 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs)
 		bp->b_vflags &= ~BV_SCANNED;
 restart:
 	TAILQ_FOREACH(bp, &bo->bo_dirty.bv_hd, b_bobufs) {
 		if (bp->b_vflags & BV_SCANNED)
 			continue;
 		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
 			bp->b_vflags |= BV_SCANNED;
 			continue;
 		}
 		KASSERT(bp->b_bufobj == bo, ("Wrong object in buffer"));
 		if ((bp = getdirtybuf(bp, BO_LOCKPTR(bo), MNT_WAIT)) == NULL)
 			goto restart;
 		BO_UNLOCK(bo);
 		if (deallocate_dependencies(bp, freeblks, blkoff))
 			bqrelse(bp);
 		else
 			brelse(bp);
 		BO_LOCK(bo);
 		goto restart;
 	}
 	/*
 	 * Now do the work of vtruncbuf while also matching indirect blocks.
 	 */
 	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs)
 		bp->b_vflags &= ~BV_SCANNED;
 cleanrestart:
 	TAILQ_FOREACH(bp, &bo->bo_clean.bv_hd, b_bobufs) {
 		if (bp->b_vflags & BV_SCANNED)
 			continue;
 		if (!trunc_check_buf(bp, &blkoff, lastlbn, lastoff, flags)) {
 			bp->b_vflags |= BV_SCANNED;
 			continue;
 		}
 		if (BUF_LOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
 		    BO_LOCKPTR(bo)) == ENOLCK) {
 			BO_LOCK(bo);
 			goto cleanrestart;
 		}
 		bp->b_vflags |= BV_SCANNED;
 		bremfree(bp);
 		if (blkoff != 0) {
 			allocbuf(bp, blkoff);
 			bqrelse(bp);
 		} else {
 			bp->b_flags |= B_INVAL | B_NOCACHE | B_RELBUF;
 			brelse(bp);
 		}
 		BO_LOCK(bo);
 		goto cleanrestart;
 	}
 	drain_output(vp);
 	BO_UNLOCK(bo);
 }
 
 static int
 cancel_pagedep(pagedep, freeblks, blkoff)
 	struct pagedep *pagedep;
 	struct freeblks *freeblks;
 	int blkoff;
 {
 	struct jremref *jremref;
 	struct jmvref *jmvref;
 	struct dirrem *dirrem, *tmp;
 	int i;
 
 	/*
 	 * Copy any directory remove dependencies to the list
 	 * to be processed after the freeblks proceeds.  If
 	 * directory entry never made it to disk they
 	 * can be dumped directly onto the work list.
 	 */
 	LIST_FOREACH_SAFE(dirrem, &pagedep->pd_dirremhd, dm_next, tmp) {
 		/* Skip this directory removal if it is intended to remain. */
 		if (dirrem->dm_offset < blkoff)
 			continue;
 		/*
 		 * If there are any dirrems we wait for the journal write
 		 * to complete and then restart the buf scan as the lock
 		 * has been dropped.
 		 */
 		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL) {
 			jwait(&jremref->jr_list, MNT_WAIT);
 			return (ERESTART);
 		}
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &dirrem->dm_list);
 	}
 	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL) {
 		jwait(&jmvref->jm_list, MNT_WAIT);
 		return (ERESTART);
 	}
 	/*
 	 * When we're partially truncating a pagedep we just want to flush
 	 * journal entries and return.  There can not be any adds in the
 	 * truncated portion of the directory and newblk must remain if
 	 * part of the block remains.
 	 */
 	if (blkoff != 0) {
 		struct diradd *dap;
 
 		LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
 			if (dap->da_offset > blkoff)
 				panic("cancel_pagedep: diradd %p off %d > %d",
 				    dap, dap->da_offset, blkoff);
 		for (i = 0; i < DAHASHSZ; i++)
 			LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist)
 				if (dap->da_offset > blkoff)
 					panic("cancel_pagedep: diradd %p off %d > %d",
 					    dap, dap->da_offset, blkoff);
 		return (0);
 	}
 	/*
 	 * There should be no directory add dependencies present
 	 * as the directory could not be truncated until all
 	 * children were removed.
 	 */
 	KASSERT(LIST_FIRST(&pagedep->pd_pendinghd) == NULL,
 	    ("deallocate_dependencies: pendinghd != NULL"));
 	for (i = 0; i < DAHASHSZ; i++)
 		KASSERT(LIST_FIRST(&pagedep->pd_diraddhd[i]) == NULL,
 		    ("deallocate_dependencies: diraddhd != NULL"));
 	if ((pagedep->pd_state & NEWBLOCK) != 0)
 		free_newdirblk(pagedep->pd_newdirblk);
 	if (free_pagedep(pagedep) == 0)
 		panic("Failed to free pagedep %p", pagedep);
 	return (0);
 }
 
 /*
  * Reclaim any dependency structures from a buffer that is about to
  * be reallocated to a new vnode. The buffer must be locked, thus,
  * no I/O completion operations can occur while we are manipulating
  * its associated dependencies. The mutex is held so that other I/O's
  * associated with related dependencies do not occur.
  */
 static int
 deallocate_dependencies(bp, freeblks, off)
 	struct buf *bp;
 	struct freeblks *freeblks;
 	int off;
 {
 	struct indirdep *indirdep;
 	struct pagedep *pagedep;
 	struct worklist *wk, *wkn;
 	struct ufsmount *ump;
 
 	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
 		goto done;
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	LIST_FOREACH_SAFE(wk, &bp->b_dep, wk_list, wkn) {
 		switch (wk->wk_type) {
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (bp->b_lblkno >= 0 ||
 			    bp->b_blkno != indirdep->ir_savebp->b_lblkno)
 				panic("deallocate_dependencies: not indir");
 			cancel_indirdep(indirdep, bp, freeblks);
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			if (cancel_pagedep(pagedep, freeblks, off)) {
 				FREE_LOCK(ump);
 				return (ERESTART);
 			}
 			continue;
 
 		case D_ALLOCINDIR:
 			/*
 			 * Simply remove the allocindir, we'll find it via
 			 * the indirdep where we can clear pointers if
 			 * needed.
 			 */
 			WORKLIST_REMOVE(wk);
 			continue;
 
 		case D_FREEWORK:
 			/*
 			 * A truncation is waiting for the zero'd pointers
 			 * to be written.  It can be freed when the freeblks
 			 * is journaled.
 			 */
 			WORKLIST_REMOVE(wk);
 			wk->wk_state |= ONDEPLIST;
 			WORKLIST_INSERT(&freeblks->fb_freeworkhd, wk);
 			break;
 
 		case D_ALLOCDIRECT:
 			if (off != 0)
 				continue;
 			/* FALLTHROUGH */
 		default:
 			panic("deallocate_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	FREE_LOCK(ump);
 done:
 	/*
 	 * Don't throw away this buf, we were partially truncating and
 	 * some deps may always remain.
 	 */
 	if (off) {
 		allocbuf(bp, off);
 		bp->b_vflags |= BV_SCANNED;
 		return (EBUSY);
 	}
 	bp->b_flags |= B_INVAL | B_NOCACHE;
 
 	return (0);
 }
 
 /*
  * An allocdirect is being canceled due to a truncate.  We must make sure
  * the journal entry is released in concert with the blkfree that releases
  * the storage.  Completed journal entries must not be released until the
  * space is no longer pointed to by the inode or in the bitmap.
  */
 static void
 cancel_allocdirect(adphead, adp, freeblks)
 	struct allocdirectlst *adphead;
 	struct allocdirect *adp;
 	struct freeblks *freeblks;
 {
 	struct freework *freework;
 	struct newblk *newblk;
 	struct worklist *wk;
 
 	TAILQ_REMOVE(adphead, adp, ad_next);
 	newblk = (struct newblk *)adp;
 	freework = NULL;
 	/*
 	 * Find the correct freework structure.
 	 */
 	LIST_FOREACH(wk, &freeblks->fb_freeworkhd, wk_list) {
 		if (wk->wk_type != D_FREEWORK)
 			continue;
 		freework = WK_FREEWORK(wk);
 		if (freework->fw_blkno == newblk->nb_newblkno)
 			break;
 	}
 	if (freework == NULL)
 		panic("cancel_allocdirect: Freework not found");
 	/*
 	 * If a newblk exists at all we still have the journal entry that
 	 * initiated the allocation so we do not need to journal the free.
 	 */
 	cancel_jfreeblk(freeblks, freework->fw_blkno);
 	/*
 	 * If the journal hasn't been written the jnewblk must be passed
 	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
 	 * this by linking the journal dependency into the freework to be
 	 * freed when freework_freeblock() is called.  If the journal has
 	 * been written we can simply reclaim the journal space when the
 	 * freeblks work is complete.
 	 */
 	freework->fw_jnewblk = cancel_newblk(newblk, &freework->fw_list,
 	    &freeblks->fb_jwork);
 	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
 }
 
 
 /*
  * Cancel a new block allocation.  May be an indirect or direct block.  We
  * remove it from various lists and return any journal record that needs to
  * be resolved by the caller.
  *
  * A special consideration is made for indirects which were never pointed
  * at on disk and will never be found once this block is released.
  */
 static struct jnewblk *
 cancel_newblk(newblk, wk, wkhd)
 	struct newblk *newblk;
 	struct worklist *wk;
 	struct workhead *wkhd;
 {
 	struct jnewblk *jnewblk;
 
 	CTR1(KTR_SUJ, "cancel_newblk: blkno %jd", newblk->nb_newblkno);
 	    
 	newblk->nb_state |= GOINGAWAY;
 	/*
 	 * Previously we traversed the completedhd on each indirdep
 	 * attached to this newblk to cancel them and gather journal
 	 * work.  Since we need only the oldest journal segment and
 	 * the lowest point on the tree will always have the oldest
 	 * journal segment we are free to release the segments
 	 * of any subordinates and may leave the indirdep list to
 	 * indirdep_complete() when this newblk is freed.
 	 */
 	if (newblk->nb_state & ONDEPLIST) {
 		newblk->nb_state &= ~ONDEPLIST;
 		LIST_REMOVE(newblk, nb_deps);
 	}
 	if (newblk->nb_state & ONWORKLIST)
 		WORKLIST_REMOVE(&newblk->nb_list);
 	/*
 	 * If the journal entry hasn't been written we save a pointer to
 	 * the dependency that frees it until it is written or the
 	 * superseding operation completes.
 	 */
 	jnewblk = newblk->nb_jnewblk;
 	if (jnewblk != NULL && wk != NULL) {
 		newblk->nb_jnewblk = NULL;
 		jnewblk->jn_dep = wk;
 	}
 	if (!LIST_EMPTY(&newblk->nb_jwork))
 		jwork_move(wkhd, &newblk->nb_jwork);
 	/*
 	 * When truncating we must free the newdirblk early to remove
 	 * the pagedep from the hash before returning.
 	 */
 	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
 		free_newdirblk(WK_NEWDIRBLK(wk));
 	if (!LIST_EMPTY(&newblk->nb_newdirblk))
 		panic("cancel_newblk: extra newdirblk");
 
 	return (jnewblk);
 }
 
 /*
  * Schedule the freefrag associated with a newblk to be released once
  * the pointers are written and the previous block is no longer needed.
  */
 static void
 newblk_freefrag(newblk)
 	struct newblk *newblk;
 {
 	struct freefrag *freefrag;
 
 	if (newblk->nb_freefrag == NULL)
 		return;
 	freefrag = newblk->nb_freefrag;
 	newblk->nb_freefrag = NULL;
 	freefrag->ff_state |= COMPLETE;
 	if ((freefrag->ff_state & ALLCOMPLETE) == ALLCOMPLETE)
 		add_to_worklist(&freefrag->ff_list, 0);
 }
 
 /*
  * Free a newblk. Generate a new freefrag work request if appropriate.
  * This must be called after the inode pointer and any direct block pointers
  * are valid or fully removed via truncate or frag extension.
  */
 static void
 free_newblk(newblk)
 	struct newblk *newblk;
 {
 	struct indirdep *indirdep;
 	struct worklist *wk;
 
 	KASSERT(newblk->nb_jnewblk == NULL,
 	    ("free_newblk: jnewblk %p still attached", newblk->nb_jnewblk));
 	KASSERT(newblk->nb_list.wk_type != D_NEWBLK,
 	    ("free_newblk: unclaimed newblk"));
 	LOCK_OWNED(VFSTOUFS(newblk->nb_list.wk_mp));
 	newblk_freefrag(newblk);
 	if (newblk->nb_state & ONDEPLIST)
 		LIST_REMOVE(newblk, nb_deps);
 	if (newblk->nb_state & ONWORKLIST)
 		WORKLIST_REMOVE(&newblk->nb_list);
 	LIST_REMOVE(newblk, nb_hash);
 	if ((wk = LIST_FIRST(&newblk->nb_newdirblk)) != NULL)
 		free_newdirblk(WK_NEWDIRBLK(wk));
 	if (!LIST_EMPTY(&newblk->nb_newdirblk))
 		panic("free_newblk: extra newdirblk");
 	while ((indirdep = LIST_FIRST(&newblk->nb_indirdeps)) != NULL)
 		indirdep_complete(indirdep);
 	handle_jwork(&newblk->nb_jwork);
 	WORKITEM_FREE(newblk, D_NEWBLK);
 }
 
 /*
  * Free a newdirblk. Clear the NEWBLOCK flag on its associated pagedep.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 free_newdirblk(newdirblk)
 	struct newdirblk *newdirblk;
 {
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	struct worklist *wk;
 
 	LOCK_OWNED(VFSTOUFS(newdirblk->db_list.wk_mp));
 	WORKLIST_REMOVE(&newdirblk->db_list);
 	/*
 	 * If the pagedep is still linked onto the directory buffer
 	 * dependency chain, then some of the entries on the
 	 * pd_pendinghd list may not be committed to disk yet. In
 	 * this case, we will simply clear the NEWBLOCK flag and
 	 * let the pd_pendinghd list be processed when the pagedep
 	 * is next written. If the pagedep is no longer on the buffer
 	 * dependency chain, then all the entries on the pd_pending
 	 * list are committed to disk and we can free them here.
 	 */
 	pagedep = newdirblk->db_pagedep;
 	pagedep->pd_state &= ~NEWBLOCK;
 	if ((pagedep->pd_state & ONWORKLIST) == 0) {
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 			free_diradd(dap, NULL);
 		/*
 		 * If no dependencies remain, the pagedep will be freed.
 		 */
 		free_pagedep(pagedep);
 	}
 	/* Should only ever be one item in the list. */
 	while ((wk = LIST_FIRST(&newdirblk->db_mkdir)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 	}
 	WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 }
 
 /*
  * Prepare an inode to be freed. The actual free operation is not
  * done until the zero'ed inode has been written to disk.
  */
 void
 softdep_freefile(pvp, ino, mode)
 	struct vnode *pvp;
 	ino_t ino;
 	int mode;
 {
 	struct inode *ip = VTOI(pvp);
 	struct inodedep *inodedep;
 	struct freefile *freefile;
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_freefile called on non-softdep filesystem"));
 	/*
 	 * This sets up the inode de-allocation dependency.
 	 */
 	freefile = malloc(sizeof(struct freefile),
 		M_FREEFILE, M_SOFTDEP_FLAGS);
 	workitem_alloc(&freefile->fx_list, D_FREEFILE, pvp->v_mount);
 	freefile->fx_mode = mode;
 	freefile->fx_oldinum = ino;
 	freefile->fx_devvp = ump->um_devvp;
 	LIST_INIT(&freefile->fx_jwork);
 	UFS_LOCK(ump);
 	ump->um_fs->fs_pendinginodes += 1;
 	UFS_UNLOCK(ump);
 
 	/*
 	 * If the inodedep does not exist, then the zero'ed inode has
 	 * been written to disk. If the allocated inode has never been
 	 * written to disk, then the on-disk inode is zero'ed. In either
 	 * case we can free the file immediately.  If the journal was
 	 * canceled before being written the inode will never make it to
 	 * disk and we must send the canceled journal entrys to
 	 * ffs_freefile() to be cleared in conjunction with the bitmap.
 	 * Any blocks waiting on the inode to write can be safely freed
 	 * here as it will never been written.
 	 */
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 	if (inodedep) {
 		/*
 		 * Clear out freeblks that no longer need to reference
 		 * this inode.
 		 */
 		while ((freeblks =
 		    TAILQ_FIRST(&inodedep->id_freeblklst)) != NULL) {
 			TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks,
 			    fb_next);
 			freeblks->fb_state &= ~ONDEPLIST;
 		}
 		/*
 		 * Remove this inode from the unlinked list.
 		 */
 		if (inodedep->id_state & UNLINKED) {
 			/*
 			 * Save the journal work to be freed with the bitmap
 			 * before we clear UNLINKED.  Otherwise it can be lost
 			 * if the inode block is written.
 			 */
 			handle_bufwait(inodedep, &freefile->fx_jwork);
 			clear_unlinked_inodedep(inodedep);
 			/*
 			 * Re-acquire inodedep as we've dropped the
 			 * per-filesystem lock in clear_unlinked_inodedep().
 			 */
 			inodedep_lookup(pvp->v_mount, ino, 0, &inodedep);
 		}
 	}
 	if (inodedep == NULL || check_inode_unwritten(inodedep)) {
 		FREE_LOCK(ump);
 		handle_workitem_freefile(freefile);
 		return;
 	}
 	if ((inodedep->id_state & DEPCOMPLETE) == 0)
 		inodedep->id_state |= GOINGAWAY;
 	WORKLIST_INSERT(&inodedep->id_inowait, &freefile->fx_list);
 	FREE_LOCK(ump);
 	if (ip->i_number == ino)
 		ip->i_flag |= IN_MODIFIED;
 }
 
 /*
  * Check to see if an inode has never been written to disk. If
  * so free the inodedep and return success, otherwise return failure.
  * This routine must be called with splbio interrupts blocked.
  *
  * If we still have a bitmap dependency, then the inode has never
  * been written to disk. Drop the dependency as it is no longer
  * necessary since the inode is being deallocated. We set the
  * ALLCOMPLETE flags since the bitmap now properly shows that the
  * inode is not allocated. Even if the inode is actively being
  * written, it has been rolled back to its zero'ed state, so we
  * are ensured that a zero inode is what is on the disk. For short
  * lived files, this change will usually result in removing all the
  * dependencies from the inode so that it can be freed immediately.
  */
 static int
 check_inode_unwritten(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 
 	if ((inodedep->id_state & (DEPCOMPLETE | UNLINKED)) != 0 ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
 	    inodedep->id_mkdiradd != NULL || 
 	    inodedep->id_nlinkdelta != 0)
 		return (0);
 	/*
 	 * Another process might be in initiate_write_inodeblock_ufs[12]
 	 * trying to allocate memory without holding "Softdep Lock".
 	 */
 	if ((inodedep->id_state & IOSTARTED) != 0 &&
 	    inodedep->id_savedino1 == NULL)
 		return (0);
 
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	inodedep->id_state &= ~ONDEPLIST;
 	inodedep->id_state |= ALLCOMPLETE;
 	inodedep->id_bmsafemap = NULL;
 	if (inodedep->id_state & ONWORKLIST)
 		WORKLIST_REMOVE(&inodedep->id_list);
 	if (inodedep->id_savedino1 != NULL) {
 		free(inodedep->id_savedino1, M_SAVEDINO);
 		inodedep->id_savedino1 = NULL;
 	}
 	if (free_inodedep(inodedep) == 0)
 		panic("check_inode_unwritten: busy inode");
 	return (1);
 }
 
 static int
 check_inodedep_free(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 	if ((inodedep->id_state & ALLCOMPLETE) != ALLCOMPLETE ||
 	    !LIST_EMPTY(&inodedep->id_dirremhd) ||
 	    !LIST_EMPTY(&inodedep->id_pendinghd) ||
 	    !LIST_EMPTY(&inodedep->id_bufwait) ||
 	    !LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoreflst) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_freeblklst) ||
 	    inodedep->id_mkdiradd != NULL ||
 	    inodedep->id_nlinkdelta != 0 ||
 	    inodedep->id_savedino1 != NULL)
 		return (0);
 	return (1);
 }
 
 /*
  * Try to free an inodedep structure. Return 1 if it could be freed.
  */
 static int
 free_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 
 	LOCK_OWNED(VFSTOUFS(inodedep->id_list.wk_mp));
 	if ((inodedep->id_state & (ONWORKLIST | UNLINKED)) != 0 ||
 	    !check_inodedep_free(inodedep))
 		return (0);
 	if (inodedep->id_state & ONDEPLIST)
 		LIST_REMOVE(inodedep, id_deps);
 	LIST_REMOVE(inodedep, id_hash);
 	WORKITEM_FREE(inodedep, D_INODEDEP);
 	return (1);
 }
 
 /*
  * Free the block referenced by a freework structure.  The parent freeblks
  * structure is released and completed when the final cg bitmap reaches
  * the disk.  This routine may be freeing a jnewblk which never made it to
  * disk in which case we do not have to wait as the operation is undone
  * in memory immediately.
  */
 static void
 freework_freeblock(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct workhead wkhd;
 	struct fs *fs;
 	int bsize;
 	int needj;
 
 	ump = VFSTOUFS(freework->fw_list.wk_mp);
 	LOCK_OWNED(ump);
 	/*
 	 * Handle partial truncate separately.
 	 */
 	if (freework->fw_indir) {
 		complete_trunc_indir(freework);
 		return;
 	}
 	freeblks = freework->fw_freeblks;
 	fs = ump->um_fs;
 	needj = MOUNTEDSUJ(freeblks->fb_list.wk_mp) != 0;
 	bsize = lfragtosize(fs, freework->fw_frags);
 	LIST_INIT(&wkhd);
 	/*
 	 * DEPCOMPLETE is cleared in indirblk_insert() if the block lives
 	 * on the indirblk hashtable and prevents premature freeing.
 	 */
 	freework->fw_state |= DEPCOMPLETE;
 	/*
 	 * SUJ needs to wait for the segment referencing freed indirect
 	 * blocks to expire so that we know the checker will not confuse
 	 * a re-allocated indirect block with its old contents.
 	 */
 	if (needj && freework->fw_lbn <= -NDADDR)
 		indirblk_insert(freework);
 	/*
 	 * If we are canceling an existing jnewblk pass it to the free
 	 * routine, otherwise pass the freeblk which will ultimately
 	 * release the freeblks.  If we're not journaling, we can just
 	 * free the freeblks immediately.
 	 */
 	jnewblk = freework->fw_jnewblk;
 	if (jnewblk != NULL) {
 		cancel_jnewblk(jnewblk, &wkhd);
 		needj = 0;
 	} else if (needj) {
 		freework->fw_state |= DELAYEDFREE;
 		freeblks->fb_cgwait++;
 		WORKLIST_INSERT(&wkhd, &freework->fw_list);
 	}
 	FREE_LOCK(ump);
 	freeblks_free(ump, freeblks, btodb(bsize));
 	CTR4(KTR_SUJ,
 	    "freework_freeblock: ino %d blkno %jd lbn %jd size %ld",
 	    freeblks->fb_inum, freework->fw_blkno, freework->fw_lbn, bsize);
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize,
 	    freeblks->fb_inum, freeblks->fb_vtype, &wkhd);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * The jnewblk will be discarded and the bits in the map never
 	 * made it to disk.  We can immediately free the freeblk.
 	 */
 	if (needj == 0)
 		handle_written_freework(freework);
 }
 
 /*
  * We enqueue freework items that need processing back on the freeblks and
  * add the freeblks to the worklist.  This makes it easier to find all work
  * required to flush a truncation in process_truncates().
  */
 static void
 freework_enqueue(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 
 	freeblks = freework->fw_freeblks;
 	if ((freework->fw_state & INPROGRESS) == 0)
 		WORKLIST_INSERT(&freeblks->fb_freeworkhd, &freework->fw_list);
 	if ((freeblks->fb_state &
 	    (ONWORKLIST | INPROGRESS | ALLCOMPLETE)) == ALLCOMPLETE &&
 	    LIST_EMPTY(&freeblks->fb_jblkdephd))
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 }
 
 /*
  * Start, continue, or finish the process of freeing an indirect block tree.
  * The free operation may be paused at any point with fw_off containing the
  * offset to restart from.  This enables us to implement some flow control
  * for large truncates which may fan out and generate a huge number of
  * dependencies.
  */
 static void
 handle_workitem_indirblk(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct ufsmount *ump;
 	struct fs *fs;
 
 	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	if (freework->fw_state & DEPCOMPLETE) {
 		handle_written_freework(freework);
 		return;
 	}
 	if (freework->fw_off == NINDIR(fs)) {
 		freework_freeblock(freework);
 		return;
 	}
 	freework->fw_state |= INPROGRESS;
 	FREE_LOCK(ump);
 	indir_trunc(freework, fsbtodb(fs, freework->fw_blkno),
 	    freework->fw_lbn);
 	ACQUIRE_LOCK(ump);
 }
 
 /*
  * Called when a freework structure attached to a cg buf is written.  The
  * ref on either the parent or the freeblks structure is released and
  * the freeblks is added back to the worklist if there is more work to do.
  */
 static void
 handle_written_freework(freework)
 	struct freework *freework;
 {
 	struct freeblks *freeblks;
 	struct freework *parent;
 
 	freeblks = freework->fw_freeblks;
 	parent = freework->fw_parent;
 	if (freework->fw_state & DELAYEDFREE)
 		freeblks->fb_cgwait--;
 	freework->fw_state |= COMPLETE;
 	if ((freework->fw_state & ALLCOMPLETE) == ALLCOMPLETE)
 		WORKITEM_FREE(freework, D_FREEWORK);
 	if (parent) {
 		if (--parent->fw_ref == 0)
 			freework_enqueue(parent);
 		return;
 	}
 	if (--freeblks->fb_ref != 0)
 		return;
 	if ((freeblks->fb_state & (ALLCOMPLETE | ONWORKLIST | INPROGRESS)) ==
 	    ALLCOMPLETE && LIST_EMPTY(&freeblks->fb_jblkdephd)) 
 		add_to_worklist(&freeblks->fb_list, WK_NODELAY);
 }
 
 /*
  * This workitem routine performs the block de-allocation.
  * The workitem is added to the pending list after the updated
  * inode block has been written to disk.  As mentioned above,
  * checks regarding the number of blocks de-allocated (compared
  * to the number of blocks allocated for the file) are also
  * performed in this function.
  */
 static int
 handle_workitem_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
 	int flags;
 {
 	struct freework *freework;
 	struct newblk *newblk;
 	struct allocindir *aip;
 	struct ufsmount *ump;
 	struct worklist *wk;
 
 	KASSERT(LIST_EMPTY(&freeblks->fb_jblkdephd),
 	    ("handle_workitem_freeblocks: Journal entries not written."));
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&freeblks->fb_freeworkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_DIRREM:
 			wk->wk_state |= COMPLETE;
 			add_to_worklist(wk, 0);
 			continue;
 
 		case D_ALLOCDIRECT:
 			free_newblk(WK_NEWBLK(wk));
 			continue;
 
 		case D_ALLOCINDIR:
 			aip = WK_ALLOCINDIR(wk);
 			freework = NULL;
 			if (aip->ai_state & DELAYEDFREE) {
 				FREE_LOCK(ump);
 				freework = newfreework(ump, freeblks, NULL,
 				    aip->ai_lbn, aip->ai_newblkno,
 				    ump->um_fs->fs_frag, 0, 0);
 				ACQUIRE_LOCK(ump);
 			}
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk) {
 				freework->fw_jnewblk = newblk->nb_jnewblk;
 				newblk->nb_jnewblk->jn_dep = &freework->fw_list;
 				newblk->nb_jnewblk = NULL;
 			}
 			free_newblk(newblk);
 			continue;
 
 		case D_FREEWORK:
 			freework = WK_FREEWORK(wk);
 			if (freework->fw_lbn <= -NDADDR)
 				handle_workitem_indirblk(freework);
 			else
 				freework_freeblock(freework);
 			continue;
 		default:
 			panic("handle_workitem_freeblocks: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 		}
 	}
 	if (freeblks->fb_ref != 0) {
 		freeblks->fb_state &= ~INPROGRESS;
 		wake_worklist(&freeblks->fb_list);
 		freeblks = NULL;
 	}
 	FREE_LOCK(ump);
 	if (freeblks)
 		return handle_complete_freeblocks(freeblks, flags);
 	return (0);
 }
 
 /*
  * Handle completion of block free via truncate.  This allows fs_pending
  * to track the actual free block count more closely than if we only updated
  * it at the end.  We must be careful to handle cases where the block count
  * on free was incorrect.
  */
 static void
 freeblks_free(ump, freeblks, blocks)
 	struct ufsmount *ump;
 	struct freeblks *freeblks;
 	int blocks;
 {
 	struct fs *fs;
 	ufs2_daddr_t remain;
 
 	UFS_LOCK(ump);
 	remain = -freeblks->fb_chkcnt;
 	freeblks->fb_chkcnt += blocks;
 	if (remain > 0) {
 		if (remain < blocks)
 			blocks = remain;
 		fs = ump->um_fs;
 		fs->fs_pendingblocks -= blocks;
 	}
 	UFS_UNLOCK(ump);
 }
 
 /*
  * Once all of the freework workitems are complete we can retire the
  * freeblocks dependency and any journal work awaiting completion.  This
  * can not be called until all other dependencies are stable on disk.
  */
 static int
 handle_complete_freeblocks(freeblks, flags)
 	struct freeblks *freeblks;
 	int flags;
 {
 	struct inodedep *inodedep;
 	struct inode *ip;
 	struct vnode *vp;
 	struct fs *fs;
 	struct ufsmount *ump;
 	ufs2_daddr_t spare;
 
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	flags = LK_EXCLUSIVE | flags;
 	spare = freeblks->fb_chkcnt;
 
 	/*
 	 * If we did not release the expected number of blocks we may have
 	 * to adjust the inode block count here.  Only do so if it wasn't
 	 * a truncation to zero and the modrev still matches.
 	 */
 	if (spare && freeblks->fb_len != 0) {
 		if (ffs_vgetf(freeblks->fb_list.wk_mp, freeblks->fb_inum,
 		    flags, &vp, FFSV_FORCEINSMQ) != 0)
 			return (EBUSY);
 		ip = VTOI(vp);
 		if (DIP(ip, i_modrev) == freeblks->fb_modrev) {
 			DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - spare);
 			ip->i_flag |= IN_CHANGE;
 			/*
 			 * We must wait so this happens before the
 			 * journal is reclaimed.
 			 */
 			ffs_update(vp, 1);
 		}
 		vput(vp);
 	}
 	if (spare < 0) {
 		UFS_LOCK(ump);
 		fs->fs_pendingblocks += spare;
 		UFS_UNLOCK(ump);
 	}
 #ifdef QUOTA
 	/* Handle spare. */
 	if (spare)
 		quotaadj(freeblks->fb_quota, ump, -spare);
 	quotarele(freeblks->fb_quota);
 #endif
 	ACQUIRE_LOCK(ump);
 	if (freeblks->fb_state & ONDEPLIST) {
 		inodedep_lookup(freeblks->fb_list.wk_mp, freeblks->fb_inum,
 		    0, &inodedep);
 		TAILQ_REMOVE(&inodedep->id_freeblklst, freeblks, fb_next);
 		freeblks->fb_state &= ~ONDEPLIST;
 		if (TAILQ_EMPTY(&inodedep->id_freeblklst))
 			free_inodedep(inodedep);
 	}
 	/*
 	 * All of the freeblock deps must be complete prior to this call
 	 * so it's now safe to complete earlier outstanding journal entries.
 	 */
 	handle_jwork(&freeblks->fb_jwork);
 	WORKITEM_FREE(freeblks, D_FREEBLKS);
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * Release blocks associated with the freeblks and stored in the indirect
  * block dbn. If level is greater than SINGLE, the block is an indirect block
  * and recursive calls to indirtrunc must be used to cleanse other indirect
  * blocks.
  *
  * This handles partial and complete truncation of blocks.  Partial is noted
  * with goingaway == 0.  In this case the freework is completed after the
  * zero'd indirects are written to disk.  For full truncation the freework
  * is completed after the block is freed.
  */
 static void
 indir_trunc(freework, dbn, lbn)
 	struct freework *freework;
 	ufs2_daddr_t dbn;
 	ufs_lbn_t lbn;
 {
 	struct freework *nfreework;
 	struct workhead wkhd;
 	struct freeblks *freeblks;
 	struct buf *bp;
 	struct fs *fs;
 	struct indirdep *indirdep;
 	struct ufsmount *ump;
 	ufs1_daddr_t *bap1;
 	ufs2_daddr_t nb, nnb, *bap2;
 	ufs_lbn_t lbnadd, nlbn;
 	int i, nblocks, ufs1fmt;
 	int freedblocks;
 	int goingaway;
 	int freedeps;
 	int needj;
 	int level;
 	int cnt;
 
 	freeblks = freework->fw_freeblks;
 	ump = VFSTOUFS(freeblks->fb_list.wk_mp);
 	fs = ump->um_fs;
 	/*
 	 * Get buffer of block pointers to be freed.  There are three cases:
 	 * 
 	 * 1) Partial truncate caches the indirdep pointer in the freework
 	 *    which provides us a back copy to the save bp which holds the
 	 *    pointers we want to clear.  When this completes the zero
 	 *    pointers are written to the real copy.
 	 * 2) The indirect is being completely truncated, cancel_indirdep()
 	 *    eliminated the real copy and placed the indirdep on the saved
 	 *    copy.  The indirdep and buf are discarded when this completes.
 	 * 3) The indirect was not in memory, we read a copy off of the disk
 	 *    using the devvp and drop and invalidate the buffer when we're
 	 *    done.
 	 */
 	goingaway = 1;
 	indirdep = NULL;
 	if (freework->fw_indir != NULL) {
 		goingaway = 0;
 		indirdep = freework->fw_indir;
 		bp = indirdep->ir_savebp;
 		if (bp == NULL || bp->b_blkno != dbn)
 			panic("indir_trunc: Bad saved buf %p blkno %jd",
 			    bp, (intmax_t)dbn);
 	} else if ((bp = incore(&freeblks->fb_devvp->v_bufobj, dbn)) != NULL) {
 		/*
 		 * The lock prevents the buf dep list from changing and
 	 	 * indirects on devvp should only ever have one dependency.
 		 */
 		indirdep = WK_INDIRDEP(LIST_FIRST(&bp->b_dep));
 		if (indirdep == NULL || (indirdep->ir_state & GOINGAWAY) == 0)
 			panic("indir_trunc: Bad indirdep %p from buf %p",
 			    indirdep, bp);
 	} else if (bread(freeblks->fb_devvp, dbn, (int)fs->fs_bsize,
 	    NOCRED, &bp) != 0) {
 		brelse(bp);
 		return;
 	}
 	ACQUIRE_LOCK(ump);
 	/* Protects against a race with complete_trunc_indir(). */
 	freework->fw_state &= ~INPROGRESS;
 	/*
 	 * If we have an indirdep we need to enforce the truncation order
 	 * and discard it when it is complete.
 	 */
 	if (indirdep) {
 		if (freework != TAILQ_FIRST(&indirdep->ir_trunc) &&
 		    !TAILQ_EMPTY(&indirdep->ir_trunc)) {
 			/*
 			 * Add the complete truncate to the list on the
 			 * indirdep to enforce in-order processing.
 			 */
 			if (freework->fw_indir == NULL)
 				TAILQ_INSERT_TAIL(&indirdep->ir_trunc,
 				    freework, fw_next);
 			FREE_LOCK(ump);
 			return;
 		}
 		/*
 		 * If we're goingaway, free the indirdep.  Otherwise it will
 		 * linger until the write completes.
 		 */
 		if (goingaway)
 			free_indirdep(indirdep);
 	}
 	FREE_LOCK(ump);
 	/* Initialize pointers depending on block size. */
 	if (ump->um_fstype == UFS1) {
 		bap1 = (ufs1_daddr_t *)bp->b_data;
 		nb = bap1[freework->fw_off];
 		ufs1fmt = 1;
 		bap2 = NULL;
 	} else {
 		bap2 = (ufs2_daddr_t *)bp->b_data;
 		nb = bap2[freework->fw_off];
 		ufs1fmt = 0;
 		bap1 = NULL;
 	}
 	level = lbn_level(lbn);
 	needj = MOUNTEDSUJ(UFSTOVFS(ump)) != 0;
 	lbnadd = lbn_offset(fs, level);
 	nblocks = btodb(fs->fs_bsize);
 	nfreework = freework;
 	freedeps = 0;
 	cnt = 0;
 	/*
 	 * Reclaim blocks.  Traverses into nested indirect levels and
 	 * arranges for the current level to be freed when subordinates
 	 * are free when journaling.
 	 */
 	for (i = freework->fw_off; i < NINDIR(fs); i++, nb = nnb) {
 		if (i != NINDIR(fs) - 1) {
 			if (ufs1fmt)
 				nnb = bap1[i+1];
 			else
 				nnb = bap2[i+1];
 		} else
 			nnb = 0;
 		if (nb == 0)
 			continue;
 		cnt++;
 		if (level != 0) {
 			nlbn = (lbn + 1) - (i * lbnadd);
 			if (needj != 0) {
 				nfreework = newfreework(ump, freeblks, freework,
 				    nlbn, nb, fs->fs_frag, 0, 0);
 				freedeps++;
 			}
 			indir_trunc(nfreework, fsbtodb(fs, nb), nlbn);
 		} else {
 			struct freedep *freedep;
 
 			/*
 			 * Attempt to aggregate freedep dependencies for
 			 * all blocks being released to the same CG.
 			 */
 			LIST_INIT(&wkhd);
 			if (needj != 0 &&
 			    (nnb == 0 || (dtog(fs, nb) != dtog(fs, nnb)))) {
 				freedep = newfreedep(freework);
 				WORKLIST_INSERT_UNLOCKED(&wkhd,
 				    &freedep->fd_list);
 				freedeps++;
 			}
 			CTR3(KTR_SUJ,
 			    "indir_trunc: ino %d blkno %jd size %ld",
 			    freeblks->fb_inum, nb, fs->fs_bsize);
 			ffs_blkfree(ump, fs, freeblks->fb_devvp, nb,
 			    fs->fs_bsize, freeblks->fb_inum,
 			    freeblks->fb_vtype, &wkhd);
 		}
 	}
 	if (goingaway) {
 		bp->b_flags |= B_INVAL | B_NOCACHE;
 		brelse(bp);
 	}
 	freedblocks = 0;
 	if (level == 0)
 		freedblocks = (nblocks * cnt);
 	if (needj == 0)
 		freedblocks += nblocks;
 	freeblks_free(ump, freeblks, freedblocks);
 	/*
 	 * If we are journaling set up the ref counts and offset so this
 	 * indirect can be completed when its children are free.
 	 */
 	if (needj) {
 		ACQUIRE_LOCK(ump);
 		freework->fw_off = i;
 		freework->fw_ref += freedeps;
 		freework->fw_ref -= NINDIR(fs) + 1;
 		if (level == 0)
 			freeblks->fb_cgwait += freedeps;
 		if (freework->fw_ref == 0)
 			freework_freeblock(freework);
 		FREE_LOCK(ump);
 		return;
 	}
 	/*
 	 * If we're not journaling we can free the indirect now.
 	 */
 	dbn = dbtofsb(fs, dbn);
 	CTR3(KTR_SUJ,
 	    "indir_trunc 2: ino %d blkno %jd size %ld",
 	    freeblks->fb_inum, dbn, fs->fs_bsize);
 	ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize,
 	    freeblks->fb_inum, freeblks->fb_vtype, NULL);
 	/* Non SUJ softdep does single-threaded truncations. */
 	if (freework->fw_blkno == dbn) {
 		freework->fw_state |= ALLCOMPLETE;
 		ACQUIRE_LOCK(ump);
 		handle_written_freework(freework);
 		FREE_LOCK(ump);
 	}
 	return;
 }
 
 /*
  * Cancel an allocindir when it is removed via truncation.  When bp is not
  * NULL the indirect never appeared on disk and is scheduled to be freed
  * independently of the indir so we can more easily track journal work.
  */
 static void
 cancel_allocindir(aip, bp, freeblks, trunc)
 	struct allocindir *aip;
 	struct buf *bp;
 	struct freeblks *freeblks;
 	int trunc;
 {
 	struct indirdep *indirdep;
 	struct freefrag *freefrag;
 	struct newblk *newblk;
 
 	newblk = (struct newblk *)aip;
 	LIST_REMOVE(aip, ai_next);
 	/*
 	 * We must eliminate the pointer in bp if it must be freed on its
 	 * own due to partial truncate or pending journal work.
 	 */
 	if (bp && (trunc || newblk->nb_jnewblk)) {
 		/*
 		 * Clear the pointer and mark the aip to be freed
 		 * directly if it never existed on disk.
 		 */
 		aip->ai_state |= DELAYEDFREE;
 		indirdep = aip->ai_indirdep;
 		if (indirdep->ir_state & UFS1FMT)
 			((ufs1_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
 		else
 			((ufs2_daddr_t *)bp->b_data)[aip->ai_offset] = 0;
 	}
 	/*
 	 * When truncating the previous pointer will be freed via
 	 * savedbp.  Eliminate the freefrag which would dup free.
 	 */
 	if (trunc && (freefrag = newblk->nb_freefrag) != NULL) {
 		newblk->nb_freefrag = NULL;
 		if (freefrag->ff_jdep)
 			cancel_jfreefrag(
 			    WK_JFREEFRAG(freefrag->ff_jdep));
 		jwork_move(&freeblks->fb_jwork, &freefrag->ff_jwork);
 		WORKITEM_FREE(freefrag, D_FREEFRAG);
 	}
 	/*
 	 * If the journal hasn't been written the jnewblk must be passed
 	 * to the call to ffs_blkfree that reclaims the space.  We accomplish
 	 * this by leaving the journal dependency on the newblk to be freed
 	 * when a freework is created in handle_workitem_freeblocks().
 	 */
 	cancel_newblk(newblk, NULL, &freeblks->fb_jwork);
 	WORKLIST_INSERT(&freeblks->fb_freeworkhd, &newblk->nb_list);
 }
 
 /*
  * Create the mkdir dependencies for . and .. in a new directory.  Link them
  * in to a newdirblk so any subsequent additions are tracked properly.  The
  * caller is responsible for adding the mkdir1 dependency to the journal
  * and updating id_mkdiradd.  This function returns with the per-filesystem
  * lock held.
  */
 static struct mkdir *
 setup_newdir(dap, newinum, dinum, newdirbp, mkdirp)
 	struct diradd *dap;
 	ino_t newinum;
 	ino_t dinum;
 	struct buf *newdirbp;
 	struct mkdir **mkdirp;
 {
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct worklist *wk;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mount *mp;
 
 	mp = dap->da_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	newdirblk = malloc(sizeof(struct newdirblk), M_NEWDIRBLK,
 	    M_SOFTDEP_FLAGS);
 	workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
 	LIST_INIT(&newdirblk->db_mkdir);
 	mkdir1 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
 	workitem_alloc(&mkdir1->md_list, D_MKDIR, mp);
 	mkdir1->md_state = ATTACHED | MKDIR_BODY;
 	mkdir1->md_diradd = dap;
 	mkdir1->md_jaddref = NULL;
 	mkdir2 = malloc(sizeof(struct mkdir), M_MKDIR, M_SOFTDEP_FLAGS);
 	workitem_alloc(&mkdir2->md_list, D_MKDIR, mp);
 	mkdir2->md_state = ATTACHED | MKDIR_PARENT;
 	mkdir2->md_diradd = dap;
 	mkdir2->md_jaddref = NULL;
 	if (MOUNTEDSUJ(mp) == 0) {
 		mkdir1->md_state |= DEPCOMPLETE;
 		mkdir2->md_state |= DEPCOMPLETE;
 	}
 	/*
 	 * Dependency on "." and ".." being written to disk.
 	 */
 	mkdir1->md_buf = newdirbp;
 	ACQUIRE_LOCK(VFSTOUFS(mp));
 	LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir1, md_mkdirs);
 	/*
 	 * We must link the pagedep, allocdirect, and newdirblk for
 	 * the initial file page so the pointer to the new directory
 	 * is not written until the directory contents are live and
 	 * any subsequent additions are not marked live until the
 	 * block is reachable via the inode.
 	 */
 	if (pagedep_lookup(mp, newdirbp, newinum, 0, 0, &pagedep) == 0)
 		panic("setup_newdir: lost pagedep");
 	LIST_FOREACH(wk, &newdirbp->b_dep, wk_list)
 		if (wk->wk_type == D_ALLOCDIRECT)
 			break;
 	if (wk == NULL)
 		panic("setup_newdir: lost allocdirect");
 	if (pagedep->pd_state & NEWBLOCK)
 		panic("setup_newdir: NEWBLOCK already set");
 	newblk = WK_NEWBLK(wk);
 	pagedep->pd_state |= NEWBLOCK;
 	pagedep->pd_newdirblk = newdirblk;
 	newdirblk->db_pagedep = pagedep;
 	WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
 	WORKLIST_INSERT(&newdirblk->db_mkdir, &mkdir1->md_list);
 	/*
 	 * Look up the inodedep for the parent directory so that we
 	 * can link mkdir2 into the pending dotdot jaddref or
 	 * the inode write if there is none.  If the inode is
 	 * ALLCOMPLETE and no jaddref is present all dependencies have
 	 * been satisfied and mkdir2 can be freed.
 	 */
 	inodedep_lookup(mp, dinum, 0, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		if (inodedep == NULL)
 			panic("setup_newdir: Lost parent.");
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == newinum &&
 		    (jaddref->ja_state & MKDIR_PARENT),
 		    ("setup_newdir: bad dotdot jaddref %p", jaddref));
 		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		mkdir2->md_jaddref = jaddref;
 		jaddref->ja_mkdir = mkdir2;
 	} else if (inodedep == NULL ||
 	    (inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state &= ~MKDIR_PARENT;
 		WORKITEM_FREE(mkdir2, D_MKDIR);
 		mkdir2 = NULL;
 	} else {
 		LIST_INSERT_HEAD(&ump->softdep_mkdirlisthd, mkdir2, md_mkdirs);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &mkdir2->md_list);
 	}
 	*mkdirp = mkdir2;
 
 	return (mkdir1);
 }
 
 /*
  * Directory entry addition dependencies.
  * 
  * When adding a new directory entry, the inode (with its incremented link
  * count) must be written to disk before the directory entry's pointer to it.
  * Also, if the inode is newly allocated, the corresponding freemap must be
  * updated (on disk) before the directory entry's pointer. These requirements
  * are met via undo/redo on the directory entry's pointer, which consists
  * simply of the inode number.
  * 
  * As directory entries are added and deleted, the free space within a
  * directory block can become fragmented.  The ufs filesystem will compact
  * a fragmented directory block to make space for a new entry. When this
  * occurs, the offsets of previously added entries change. Any "diradd"
  * dependency structures corresponding to these entries must be updated with
  * the new offsets.
  */
 
 /*
  * This routine is called after the in-memory inode's link
  * count has been incremented, but before the directory entry's
  * pointer to the inode has been set.
  */
 int
 softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp, isnewblk)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for directory */
 	off_t diroffset;	/* offset of new entry in directory */
 	ino_t newinum;		/* inode referenced by new directory entry */
 	struct buf *newdirbp;	/* non-NULL => contents of new mkdir */
 	int isnewblk;		/* entry is in a newly allocated block */
 {
 	int offset;		/* offset of new entry within directory block */
 	ufs_lbn_t lbn;		/* block in directory containing new entry */
 	struct fs *fs;
 	struct diradd *dap;
 	struct newblk *newblk;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct newdirblk *newdirblk;
 	struct mkdir *mkdir1, *mkdir2;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mount *mp;
 	int isindir;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_directory_add called on non-softdep filesystem"));
 	/*
 	 * Whiteouts have no dependencies.
 	 */
 	if (newinum == WINO) {
 		if (newdirbp != NULL)
 			bdwrite(newdirbp);
 		return (0);
 	}
 	jaddref = NULL;
 	mkdir1 = mkdir2 = NULL;
 	fs = ump->um_fs;
 	lbn = lblkno(fs, diroffset);
 	offset = blkoff(fs, diroffset);
 	dap = malloc(sizeof(struct diradd), M_DIRADD,
 		M_SOFTDEP_FLAGS|M_ZERO);
 	workitem_alloc(&dap->da_list, D_DIRADD, mp);
 	dap->da_offset = offset;
 	dap->da_newinum = newinum;
 	dap->da_state = ATTACHED;
 	LIST_INIT(&dap->da_jwork);
 	isindir = bp->b_lblkno >= NDADDR;
 	newdirblk = NULL;
 	if (isnewblk &&
 	    (isindir ? blkoff(fs, diroffset) : fragoff(fs, diroffset)) == 0) {
 		newdirblk = malloc(sizeof(struct newdirblk),
 		    M_NEWDIRBLK, M_SOFTDEP_FLAGS);
 		workitem_alloc(&newdirblk->db_list, D_NEWDIRBLK, mp);
 		LIST_INIT(&newdirblk->db_mkdir);
 	}
 	/*
 	 * If we're creating a new directory setup the dependencies and set
 	 * the dap state to wait for them.  Otherwise it's COMPLETE and
 	 * we can move on.
 	 */
 	if (newdirbp == NULL) {
 		dap->da_state |= DEPCOMPLETE;
 		ACQUIRE_LOCK(ump);
 	} else {
 		dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
 		mkdir1 = setup_newdir(dap, newinum, dp->i_number, newdirbp,
 		    &mkdir2);
 	}
 	/*
 	 * Link into parent directory pagedep to await its being written.
 	 */
 	pagedep_lookup(mp, bp, dp->i_number, lbn, DEPALLOC, &pagedep);
 #ifdef DEBUG
 	if (diradd_lookup(pagedep, offset) != NULL)
 		panic("softdep_setup_directory_add: %p already at off %d\n",
 		    diradd_lookup(pagedep, offset), offset);
 #endif
 	dap->da_pagedep = pagedep;
 	LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)], dap,
 	    da_pdlist);
 	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	/*
 	 * If we're journaling, link the diradd into the jaddref so it
 	 * may be completed after the journal entry is written.  Otherwise,
 	 * link the diradd into its inodedep.  If the inode is not yet
 	 * written place it on the bufwait list, otherwise do the post-inode
 	 * write processing to put it on the id_pendinghd list.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_directory_add: bad jaddref %p", jaddref));
 		jaddref->ja_diroff = diroffset;
 		jaddref->ja_diradd = dap;
 		add_to_journal(&jaddref->ja_list);
 	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE)
 		diradd_inode_written(dap, inodedep);
 	else
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	/*
 	 * Add the journal entries for . and .. links now that the primary
 	 * link is written.
 	 */
 	if (mkdir1 != NULL && MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_PREV(&jaddref->ja_ref,
 		    inoreflst, if_deps);
 		KASSERT(jaddref != NULL &&
 		    jaddref->ja_ino == jaddref->ja_parent &&
 		    (jaddref->ja_state & MKDIR_BODY),
 		    ("softdep_setup_directory_add: bad dot jaddref %p",
 		    jaddref));
 		mkdir1->md_jaddref = jaddref;
 		jaddref->ja_mkdir = mkdir1;
 		/*
 		 * It is important that the dotdot journal entry
 		 * is added prior to the dot entry since dot writes
 		 * both the dot and dotdot links.  These both must
 		 * be added after the primary link for the journal
 		 * to remain consistent.
 		 */
 		add_to_journal(&mkdir2->md_jaddref->ja_list);
 		add_to_journal(&jaddref->ja_list);
 	}
 	/*
 	 * If we are adding a new directory remember this diradd so that if
 	 * we rename it we can keep the dot and dotdot dependencies.  If
 	 * we are adding a new name for an inode that has a mkdiradd we
 	 * must be in rename and we have to move the dot and dotdot
 	 * dependencies to this new name.  The old name is being orphaned
 	 * soon.
 	 */
 	if (mkdir1 != NULL) {
 		if (inodedep->id_mkdiradd != NULL)
 			panic("softdep_setup_directory_add: Existing mkdir");
 		inodedep->id_mkdiradd = dap;
 	} else if (inodedep->id_mkdiradd)
 		merge_diradd(inodedep, dap);
 	if (newdirblk != NULL) {
 		/*
 		 * There is nothing to do if we are already tracking
 		 * this block.
 		 */
 		if ((pagedep->pd_state & NEWBLOCK) != 0) {
 			WORKITEM_FREE(newdirblk, D_NEWDIRBLK);
 			FREE_LOCK(ump);
 			return (0);
 		}
 		if (newblk_lookup(mp, dbtofsb(fs, bp->b_blkno), 0, &newblk)
 		    == 0)
 			panic("softdep_setup_directory_add: lost entry");
 		WORKLIST_INSERT(&newblk->nb_newdirblk, &newdirblk->db_list);
 		pagedep->pd_state |= NEWBLOCK;
 		pagedep->pd_newdirblk = newdirblk;
 		newdirblk->db_pagedep = pagedep;
 		FREE_LOCK(ump);
 		/*
 		 * If we extended into an indirect signal direnter to sync.
 		 */
 		if (isindir)
 			return (1);
 		return (0);
 	}
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * This procedure is called to change the offset of a directory
  * entry when compacting a directory block which must be owned
  * exclusively by the caller. Note that the actual entry movement
  * must be done in this procedure to ensure that no I/O completions
  * occur while the move is in progress.
  */
 void 
 softdep_change_directoryentry_offset(bp, dp, base, oldloc, newloc, entrysize)
 	struct buf *bp;		/* Buffer holding directory block. */
 	struct inode *dp;	/* inode for directory */
 	caddr_t base;		/* address of dp->i_offset */
 	caddr_t oldloc;		/* address of old directory location */
 	caddr_t newloc;		/* address of new directory location */
 	int entrysize;		/* size of directory entry */
 {
 	int offset, oldoffset, newoffset;
 	struct pagedep *pagedep;
 	struct jmvref *jmvref;
 	struct diradd *dap;
 	struct direct *de;
 	struct mount *mp;
 	struct ufsmount *ump;
 	ufs_lbn_t lbn;
 	int flags;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_change_directoryentry_offset called on "
 	     "non-softdep filesystem"));
 	de = (struct direct *)oldloc;
 	jmvref = NULL;
 	flags = 0;
 	/*
 	 * Moves are always journaled as it would be too complex to
 	 * determine if any affected adds or removes are present in the
 	 * journal.
 	 */
 	if (MOUNTEDSUJ(mp)) {
 		flags = DEPALLOC;
 		jmvref = newjmvref(dp, de->d_ino,
 		    dp->i_offset + (oldloc - base),
 		    dp->i_offset + (newloc - base));
 	}
 	lbn = lblkno(ump->um_fs, dp->i_offset);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	oldoffset = offset + (oldloc - base);
 	newoffset = offset + (newloc - base);
 	ACQUIRE_LOCK(ump);
 	if (pagedep_lookup(mp, bp, dp->i_number, lbn, flags, &pagedep) == 0)
 		goto done;
 	dap = diradd_lookup(pagedep, oldoffset);
 	if (dap) {
 		dap->da_offset = newoffset;
 		newoffset = DIRADDHASH(newoffset);
 		oldoffset = DIRADDHASH(oldoffset);
 		if ((dap->da_state & ALLCOMPLETE) != ALLCOMPLETE &&
 		    newoffset != oldoffset) {
 			LIST_REMOVE(dap, da_pdlist);
 			LIST_INSERT_HEAD(&pagedep->pd_diraddhd[newoffset],
 			    dap, da_pdlist);
 		}
 	}
 done:
 	if (jmvref) {
 		jmvref->jm_pagedep = pagedep;
 		LIST_INSERT_HEAD(&pagedep->pd_jmvrefhd, jmvref, jm_deps);
 		add_to_journal(&jmvref->jm_list);
 	}
 	bcopy(oldloc, newloc, entrysize);
 	FREE_LOCK(ump);
 }
 
 /*
  * Move the mkdir dependencies and journal work from one diradd to another
  * when renaming a directory.  The new name must depend on the mkdir deps
  * completing as the old name did.  Directories can only have one valid link
  * at a time so one must be canonical.
  */
 static void
 merge_diradd(inodedep, newdap)
 	struct inodedep *inodedep;
 	struct diradd *newdap;
 {
 	struct diradd *olddap;
 	struct mkdir *mkdir, *nextmd;
 	struct ufsmount *ump;
 	short state;
 
 	olddap = inodedep->id_mkdiradd;
 	inodedep->id_mkdiradd = newdap;
 	if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		newdap->da_state &= ~DEPCOMPLETE;
 		ump = VFSTOUFS(inodedep->id_list.wk_mp);
 		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != olddap)
 				continue;
 			mkdir->md_diradd = newdap;
 			state = mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY);
 			newdap->da_state |= state;
 			olddap->da_state &= ~state;
 			if ((olddap->da_state &
 			    (MKDIR_PARENT | MKDIR_BODY)) == 0)
 				break;
 		}
 		if ((olddap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("merge_diradd: unfound ref");
 	}
 	/*
 	 * Any mkdir related journal items are not safe to be freed until
 	 * the new name is stable.
 	 */
 	jwork_move(&newdap->da_jwork, &olddap->da_jwork);
 	olddap->da_state |= DEPCOMPLETE;
 	complete_diradd(olddap);
 }
 
 /*
  * Move the diradd to the pending list when all diradd dependencies are
  * complete.
  */
 static void
 complete_diradd(dap)
 	struct diradd *dap;
 {
 	struct pagedep *pagedep;
 
 	if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 	}
 }
 
 /*
  * Cancel a diradd when a dirrem overlaps with it.  We must cancel the journal
  * add entries and conditonally journal the remove.
  */
 static void
 cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref)
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct mkdir *mkdir;
 
 	/*
 	 * If no remove references were allocated we're on a non-journaled
 	 * filesystem and can skip the cancel step.
 	 */
 	if (jremref == NULL) {
 		free_diradd(dap, NULL);
 		return;
 	}
 	/*
 	 * Cancel the primary name an free it if it does not require
 	 * journaling.
 	 */
 	if (inodedep_lookup(dap->da_list.wk_mp, dap->da_newinum,
 	    0, &inodedep) != 0) {
 		/* Abort the addref that reference this diradd.  */
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if (inoref->if_list.wk_type != D_JADDREF)
 				continue;
 			jaddref = (struct jaddref *)inoref;
 			if (jaddref->ja_diradd != dap)
 				continue;
 			if (cancel_jaddref(jaddref, inodedep,
 			    &dirrem->dm_jwork) == 0) {
 				free_jremref(jremref);
 				jremref = NULL;
 			}
 			break;
 		}
 	}
 	/*
 	 * Cancel subordinate names and free them if they do not require
 	 * journaling.
 	 */
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		ump = VFSTOUFS(dap->da_list.wk_mp);
 		LIST_FOREACH(mkdir, &ump->softdep_mkdirlisthd, md_mkdirs) {
 			if (mkdir->md_diradd != dap)
 				continue;
 			if ((jaddref = mkdir->md_jaddref) == NULL)
 				continue;
 			mkdir->md_jaddref = NULL;
 			if (mkdir->md_state & MKDIR_PARENT) {
 				if (cancel_jaddref(jaddref, NULL,
 				    &dirrem->dm_jwork) == 0) {
 					free_jremref(dotdotremref);
 					dotdotremref = NULL;
 				}
 			} else {
 				if (cancel_jaddref(jaddref, inodedep,
 				    &dirrem->dm_jwork) == 0) {
 					free_jremref(dotremref);
 					dotremref = NULL;
 				}
 			}
 		}
 	}
 
 	if (jremref)
 		journal_jremref(dirrem, jremref, inodedep);
 	if (dotremref)
 		journal_jremref(dirrem, dotremref, inodedep);
 	if (dotdotremref)
 		journal_jremref(dirrem, dotdotremref, NULL);
 	jwork_move(&dirrem->dm_jwork, &dap->da_jwork);
 	free_diradd(dap, &dirrem->dm_jwork);
 }
 
 /*
  * Free a diradd dependency structure. This routine must be called
  * with splbio interrupts blocked.
  */
 static void
 free_diradd(dap, wkhd)
 	struct diradd *dap;
 	struct workhead *wkhd;
 {
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct mkdir *mkdir, *nextmd;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(dap->da_list.wk_mp);
 	LOCK_OWNED(ump);
 	LIST_REMOVE(dap, da_pdlist);
 	if (dap->da_state & ONWORKLIST)
 		WORKLIST_REMOVE(&dap->da_list);
 	if ((dap->da_state & DIRCHG) == 0) {
 		pagedep = dap->da_pagedep;
 	} else {
 		dirrem = dap->da_previous;
 		pagedep = dirrem->dm_pagedep;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		dirrem->dm_state |= COMPLETE;
 		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	if (inodedep_lookup(pagedep->pd_list.wk_mp, dap->da_newinum,
 	    0, &inodedep) != 0)
 		if (inodedep->id_mkdiradd == dap)
 			inodedep->id_mkdiradd = NULL;
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0) {
 		for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 		     mkdir = nextmd) {
 			nextmd = LIST_NEXT(mkdir, md_mkdirs);
 			if (mkdir->md_diradd != dap)
 				continue;
 			dap->da_state &=
 			    ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
 			LIST_REMOVE(mkdir, md_mkdirs);
 			if (mkdir->md_state & ONWORKLIST)
 				WORKLIST_REMOVE(&mkdir->md_list);
 			if (mkdir->md_jaddref != NULL)
 				panic("free_diradd: Unexpected jaddref");
 			WORKITEM_FREE(mkdir, D_MKDIR);
 			if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0)
 				break;
 		}
 		if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) != 0)
 			panic("free_diradd: unfound ref");
 	}
 	if (inodedep)
 		free_inodedep(inodedep);
 	/*
 	 * Free any journal segments waiting for the directory write.
 	 */
 	handle_jwork(&dap->da_jwork);
 	WORKITEM_FREE(dap, D_DIRADD);
 }
 
 /*
  * Directory entry removal dependencies.
  * 
  * When removing a directory entry, the entry's inode pointer must be
  * zero'ed on disk before the corresponding inode's link count is decremented
  * (possibly freeing the inode for re-use). This dependency is handled by
  * updating the directory entry but delaying the inode count reduction until
  * after the directory block has been written to disk. After this point, the
  * inode count can be decremented whenever it is convenient.
  */
 
 /*
  * This routine should be called immediately after removing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will do this task when it is safe.
  */
 void 
 softdep_setup_remove(bp, dp, ip, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	struct dirrem *dirrem, *prevdirrem;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	int direct;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_remove called on non-softdep filesystem"));
 	/*
 	 * Allocate a new dirrem if appropriate and ACQUIRE_LOCK.  We want
 	 * newdirrem() to setup the full directory remove which requires
 	 * isrmdir > 1.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 	/*
 	 * Add the dirrem to the inodedep's pending remove list for quick
 	 * discovery later.
 	 */
 	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0)
 		panic("softdep_setup_remove: Lost inodedep.");
 	KASSERT((inodedep->id_state & UNLINKED) == 0, ("inode unlinked"));
 	dirrem->dm_state |= ONDEPLIST;
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to a zeroed entry until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set then we have deleted an entry that never made it to
 	 * disk. If the entry we deleted resulted from a name change,
 	 * then the old name still resides on disk. We cannot delete
 	 * its inode (returned to us in prevdirrem) until the zeroed
 	 * directory entry gets to disk. The new inode has never been
 	 * referenced on the disk, so can be deleted immediately.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd, dirrem,
 		    dm_next);
 		FREE_LOCK(ump);
 	} else {
 		if (prevdirrem != NULL)
 			LIST_INSERT_HEAD(&dirrem->dm_pagedep->pd_dirremhd,
 			    prevdirrem, dm_next);
 		dirrem->dm_dirinum = dirrem->dm_pagedep->pd_ino;
 		direct = LIST_EMPTY(&dirrem->dm_jremrefhd);
 		FREE_LOCK(ump);
 		if (direct)
 			handle_workitem_remove(dirrem, 0);
 	}
 }
 
 /*
  * Check for an entry matching 'offset' on both the pd_dirraddhd list and the
  * pd_pendinghd list of a pagedep.
  */
 static struct diradd *
 diradd_lookup(pagedep, offset)
 	struct pagedep *pagedep;
 	int offset;
 {
 	struct diradd *dap;
 
 	LIST_FOREACH(dap, &pagedep->pd_diraddhd[DIRADDHASH(offset)], da_pdlist)
 		if (dap->da_offset == offset)
 			return (dap);
 	LIST_FOREACH(dap, &pagedep->pd_pendinghd, da_pdlist)
 		if (dap->da_offset == offset)
 			return (dap);
 	return (NULL);
 }
 
 /*
  * Search for a .. diradd dependency in a directory that is being removed.
  * If the directory was renamed to a new parent we have a diradd rather
  * than a mkdir for the .. entry.  We need to cancel it now before
  * it is found in truncate().
  */
 static struct jremref *
 cancel_diradd_dotdot(ip, dirrem, jremref)
 	struct inode *ip;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 {
 	struct pagedep *pagedep;
 	struct diradd *dap;
 	struct worklist *wk;
 
 	if (pagedep_lookup(ITOVFS(ip), NULL, ip->i_number, 0, 0, &pagedep) == 0)
 		return (jremref);
 	dap = diradd_lookup(pagedep, DOTDOT_OFFSET);
 	if (dap == NULL)
 		return (jremref);
 	cancel_diradd(dap, dirrem, jremref, NULL, NULL);
 	/*
 	 * Mark any journal work as belonging to the parent so it is freed
 	 * with the .. reference.
 	 */
 	LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
 		wk->wk_state |= MKDIR_PARENT;
 	return (NULL);
 }
 
 /*
  * Cancel the MKDIR_PARENT mkdir component of a diradd when we're going to
  * replace it with a dirrem/diradd pair as a result of re-parenting a
  * directory.  This ensures that we don't simultaneously have a mkdir and
  * a diradd for the same .. entry.
  */
 static struct jremref *
 cancel_mkdir_dotdot(ip, dirrem, jremref)
 	struct inode *ip;
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 {
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct ufsmount *ump;
 	struct mkdir *mkdir;
 	struct diradd *dap;
 	struct mount *mp;
 
 	mp = ITOVFS(ip);
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 		return (jremref);
 	dap = inodedep->id_mkdiradd;
 	if (dap == NULL || (dap->da_state & MKDIR_PARENT) == 0)
 		return (jremref);
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	for (mkdir = LIST_FIRST(&ump->softdep_mkdirlisthd); mkdir;
 	    mkdir = LIST_NEXT(mkdir, md_mkdirs))
 		if (mkdir->md_diradd == dap && mkdir->md_state & MKDIR_PARENT)
 			break;
 	if (mkdir == NULL)
 		panic("cancel_mkdir_dotdot: Unable to find mkdir\n");
 	if ((jaddref = mkdir->md_jaddref) != NULL) {
 		mkdir->md_jaddref = NULL;
 		jaddref->ja_state &= ~MKDIR_PARENT;
 		if (inodedep_lookup(mp, jaddref->ja_ino, 0, &inodedep) == 0)
 			panic("cancel_mkdir_dotdot: Lost parent inodedep");
 		if (cancel_jaddref(jaddref, inodedep, &dirrem->dm_jwork)) {
 			journal_jremref(dirrem, jremref, inodedep);
 			jremref = NULL;
 		}
 	}
 	if (mkdir->md_state & ONWORKLIST)
 		WORKLIST_REMOVE(&mkdir->md_list);
 	mkdir->md_state |= ALLCOMPLETE;
 	complete_mkdir(mkdir);
 	return (jremref);
 }
 
 static void
 journal_jremref(dirrem, jremref, inodedep)
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct inodedep *inodedep;
 {
 
 	if (inodedep == NULL)
 		if (inodedep_lookup(jremref->jr_list.wk_mp,
 		    jremref->jr_ref.if_ino, 0, &inodedep) == 0)
 			panic("journal_jremref: Lost inodedep");
 	LIST_INSERT_HEAD(&dirrem->dm_jremrefhd, jremref, jr_deps);
 	TAILQ_INSERT_TAIL(&inodedep->id_inoreflst, &jremref->jr_ref, if_deps);
 	add_to_journal(&jremref->jr_list);
 }
 
 static void
 dirrem_journal(dirrem, jremref, dotremref, dotdotremref)
 	struct dirrem *dirrem;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 {
 	struct inodedep *inodedep;
 
 
 	if (inodedep_lookup(jremref->jr_list.wk_mp, jremref->jr_ref.if_ino, 0,
 	    &inodedep) == 0)
 		panic("dirrem_journal: Lost inodedep");
 	journal_jremref(dirrem, jremref, inodedep);
 	if (dotremref)
 		journal_jremref(dirrem, dotremref, inodedep);
 	if (dotdotremref)
 		journal_jremref(dirrem, dotdotremref, NULL);
 }
 
 /*
  * Allocate a new dirrem if appropriate and return it along with
  * its associated pagedep. Called without a lock, returns with lock.
  */
 static struct dirrem *
 newdirrem(bp, dp, ip, isrmdir, prevdirremp)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	int isrmdir;		/* indicates if doing RMDIR */
 	struct dirrem **prevdirremp; /* previously referenced inode, if any */
 {
 	int offset;
 	ufs_lbn_t lbn;
 	struct diradd *dap;
 	struct dirrem *dirrem;
 	struct pagedep *pagedep;
 	struct jremref *jremref;
 	struct jremref *dotremref;
 	struct jremref *dotdotremref;
 	struct vnode *dvp;
 	struct ufsmount *ump;
 
 	/*
 	 * Whiteouts have no deletion dependencies.
 	 */
 	if (ip == NULL)
 		panic("newdirrem: whiteout");
 	dvp = ITOV(dp);
 	ump = ITOUMP(dp);
 
 	/*
 	 * If the system is over its limit and our filesystem is
 	 * responsible for more than our share of that usage and
 	 * we are not a snapshot, request some inodedep cleanup.
 	 * Limiting the number of dirrem structures will also limit
 	 * the number of freefile and freeblks structures.
 	 */
 	ACQUIRE_LOCK(ump);
 	if (!IS_SNAPSHOT(ip) && softdep_excess_items(ump, D_DIRREM))
 		schedule_cleanup(UFSTOVFS(ump));
 	else
 		FREE_LOCK(ump);
 	dirrem = malloc(sizeof(struct dirrem), M_DIRREM, M_SOFTDEP_FLAGS |
 	    M_ZERO);
 	workitem_alloc(&dirrem->dm_list, D_DIRREM, dvp->v_mount);
 	LIST_INIT(&dirrem->dm_jremrefhd);
 	LIST_INIT(&dirrem->dm_jwork);
 	dirrem->dm_state = isrmdir ? RMDIR : 0;
 	dirrem->dm_oldinum = ip->i_number;
 	*prevdirremp = NULL;
 	/*
 	 * Allocate remove reference structures to track journal write
 	 * dependencies.  We will always have one for the link and
 	 * when doing directories we will always have one more for dot.
 	 * When renaming a directory we skip the dotdot link change so
 	 * this is not needed.
 	 */
 	jremref = dotremref = dotdotremref = NULL;
 	if (DOINGSUJ(dvp)) {
 		if (isrmdir) {
 			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
 			    ip->i_effnlink + 2);
 			dotremref = newjremref(dirrem, ip, ip, DOT_OFFSET,
 			    ip->i_effnlink + 1);
 			dotdotremref = newjremref(dirrem, ip, dp, DOTDOT_OFFSET,
 			    dp->i_effnlink + 1);
 			dotdotremref->jr_state |= MKDIR_PARENT;
 		} else
 			jremref = newjremref(dirrem, dp, ip, dp->i_offset,
 			    ip->i_effnlink + 1);
 	}
 	ACQUIRE_LOCK(ump);
 	lbn = lblkno(ump->um_fs, dp->i_offset);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	pagedep_lookup(UFSTOVFS(ump), bp, dp->i_number, lbn, DEPALLOC,
 	    &pagedep);
 	dirrem->dm_pagedep = pagedep;
 	dirrem->dm_offset = offset;
 	/*
 	 * If we're renaming a .. link to a new directory, cancel any
 	 * existing MKDIR_PARENT mkdir.  If it has already been canceled
 	 * the jremref is preserved for any potential diradd in this
 	 * location.  This can not coincide with a rmdir.
 	 */
 	if (dp->i_offset == DOTDOT_OFFSET) {
 		if (isrmdir)
 			panic("newdirrem: .. directory change during remove?");
 		jremref = cancel_mkdir_dotdot(dp, dirrem, jremref);
 	}
 	/*
 	 * If we're removing a directory search for the .. dependency now and
 	 * cancel it.  Any pending journal work will be added to the dirrem
 	 * to be completed when the workitem remove completes.
 	 */
 	if (isrmdir)
 		dotdotremref = cancel_diradd_dotdot(ip, dirrem, dotdotremref);
 	/*
 	 * Check for a diradd dependency for the same directory entry.
 	 * If present, then both dependencies become obsolete and can
 	 * be de-allocated.
 	 */
 	dap = diradd_lookup(pagedep, offset);
 	if (dap == NULL) {
 		/*
 		 * Link the jremref structures into the dirrem so they are
 		 * written prior to the pagedep.
 		 */
 		if (jremref)
 			dirrem_journal(dirrem, jremref, dotremref,
 			    dotdotremref);
 		return (dirrem);
 	}
 	/*
 	 * Must be ATTACHED at this point.
 	 */
 	if ((dap->da_state & ATTACHED) == 0)
 		panic("newdirrem: not ATTACHED");
 	if (dap->da_newinum != ip->i_number)
 		panic("newdirrem: inum %ju should be %ju",
 		    (uintmax_t)ip->i_number, (uintmax_t)dap->da_newinum);
 	/*
 	 * If we are deleting a changed name that never made it to disk,
 	 * then return the dirrem describing the previous inode (which
 	 * represents the inode currently referenced from this entry on disk).
 	 */
 	if ((dap->da_state & DIRCHG) != 0) {
 		*prevdirremp = dap->da_previous;
 		dap->da_state &= ~DIRCHG;
 		dap->da_pagedep = pagedep;
 	}
 	/*
 	 * We are deleting an entry that never made it to disk.
 	 * Mark it COMPLETE so we can delete its inode immediately.
 	 */
 	dirrem->dm_state |= COMPLETE;
 	cancel_diradd(dap, dirrem, jremref, dotremref, dotdotremref);
 #ifdef SUJ_DEBUG
 	if (isrmdir == 0) {
 		struct worklist *wk;
 
 		LIST_FOREACH(wk, &dirrem->dm_jwork, wk_list)
 			if (wk->wk_state & (MKDIR_BODY | MKDIR_PARENT))
 				panic("bad wk %p (0x%X)\n", wk, wk->wk_state);
 	}
 #endif
 
 	return (dirrem);
 }
 
 /*
  * Directory entry change dependencies.
  * 
  * Changing an existing directory entry requires that an add operation
  * be completed first followed by a deletion. The semantics for the addition
  * are identical to the description of adding a new entry above except
  * that the rollback is to the old inode number rather than zero. Once
  * the addition dependency is completed, the removal is done as described
  * in the removal routine above.
  */
 
 /*
  * This routine should be called immediately after changing
  * a directory entry.  The inode's link count should not be
  * decremented by the calling procedure -- the soft updates
  * code will perform this task when it is safe.
  */
 void 
 softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
 	struct buf *bp;		/* buffer containing directory block */
 	struct inode *dp;	/* inode for the directory being modified */
 	struct inode *ip;	/* inode for directory entry being removed */
 	ino_t newinum;		/* new inode number for changed entry */
 	int isrmdir;		/* indicates if doing RMDIR */
 {
 	int offset;
 	struct diradd *dap = NULL;
 	struct dirrem *dirrem, *prevdirrem;
 	struct pagedep *pagedep;
 	struct inodedep *inodedep;
 	struct jaddref *jaddref;
 	struct mount *mp;
 	struct ufsmount *ump;
 
 	mp = ITOVFS(dp);
 	ump = VFSTOUFS(mp);
 	offset = blkoff(ump->um_fs, dp->i_offset);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	   ("softdep_setup_directory_change called on non-softdep filesystem"));
 
 	/*
 	 * Whiteouts do not need diradd dependencies.
 	 */
 	if (newinum != WINO) {
 		dap = malloc(sizeof(struct diradd),
 		    M_DIRADD, M_SOFTDEP_FLAGS|M_ZERO);
 		workitem_alloc(&dap->da_list, D_DIRADD, mp);
 		dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
 		dap->da_offset = offset;
 		dap->da_newinum = newinum;
 		LIST_INIT(&dap->da_jwork);
 	}
 
 	/*
 	 * Allocate a new dirrem and ACQUIRE_LOCK.
 	 */
 	dirrem = newdirrem(bp, dp, ip, isrmdir, &prevdirrem);
 	pagedep = dirrem->dm_pagedep;
 	/*
 	 * The possible values for isrmdir:
 	 *	0 - non-directory file rename
 	 *	1 - directory rename within same directory
 	 *   inum - directory rename to new directory of given inode number
 	 * When renaming to a new directory, we are both deleting and
 	 * creating a new directory entry, so the link count on the new
 	 * directory should not change. Thus we do not need the followup
 	 * dirrem which is usually done in handle_workitem_remove. We set
 	 * the DIRCHG flag to tell handle_workitem_remove to skip the 
 	 * followup dirrem.
 	 */
 	if (isrmdir > 1)
 		dirrem->dm_state |= DIRCHG;
 
 	/*
 	 * Whiteouts have no additional dependencies,
 	 * so just put the dirrem on the correct list.
 	 */
 	if (newinum == WINO) {
 		if ((dirrem->dm_state & COMPLETE) == 0) {
 			LIST_INSERT_HEAD(&pagedep->pd_dirremhd, dirrem,
 			    dm_next);
 		} else {
 			dirrem->dm_dirinum = pagedep->pd_ino;
 			if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 				add_to_worklist(&dirrem->dm_list, 0);
 		}
 		FREE_LOCK(ump);
 		return;
 	}
 	/*
 	 * Add the dirrem to the inodedep's pending remove list for quick
 	 * discovery later.  A valid nlinkdelta ensures that this lookup
 	 * will not fail.
 	 */
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 		panic("softdep_setup_directory_change: Lost inodedep.");
 	dirrem->dm_state |= ONDEPLIST;
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 
 	/*
 	 * If the COMPLETE flag is clear, then there were no active
 	 * entries and we want to roll back to the previous inode until
 	 * the new inode is committed to disk. If the COMPLETE flag is
 	 * set, then we have deleted an entry that never made it to disk.
 	 * If the entry we deleted resulted from a name change, then the old
 	 * inode reference still resides on disk. Any rollback that we do
 	 * needs to be to that old inode (returned to us in prevdirrem). If
 	 * the entry we deleted resulted from a create, then there is
 	 * no entry on the disk, so we want to roll back to zero rather
 	 * than the uncommitted inode. In either of the COMPLETE cases we
 	 * want to immediately free the unwritten and unreferenced inode.
 	 */
 	if ((dirrem->dm_state & COMPLETE) == 0) {
 		dap->da_previous = dirrem;
 	} else {
 		if (prevdirrem != NULL) {
 			dap->da_previous = prevdirrem;
 		} else {
 			dap->da_state &= ~DIRCHG;
 			dap->da_pagedep = pagedep;
 		}
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		if (LIST_EMPTY(&dirrem->dm_jremrefhd))
 			add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
 	 * Lookup the jaddref for this journal entry.  We must finish
 	 * initializing it and make the diradd write dependent on it.
 	 * If we're not journaling, put it on the id_bufwait list if the
 	 * inode is not yet written. If it is written, do the post-inode
 	 * write processing to put it on the id_pendinghd list.
 	 */
 	inodedep_lookup(mp, newinum, DEPALLOC, &inodedep);
 	if (MOUNTEDSUJ(mp)) {
 		jaddref = (struct jaddref *)TAILQ_LAST(&inodedep->id_inoreflst,
 		    inoreflst);
 		KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number,
 		    ("softdep_setup_directory_change: bad jaddref %p",
 		    jaddref));
 		jaddref->ja_diroff = dp->i_offset;
 		jaddref->ja_diradd = dap;
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		add_to_journal(&jaddref->ja_list);
 	} else if ((inodedep->id_state & ALLCOMPLETE) == ALLCOMPLETE) {
 		dap->da_state |= COMPLETE;
 		LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 	} else {
 		LIST_INSERT_HEAD(&pagedep->pd_diraddhd[DIRADDHASH(offset)],
 		    dap, da_pdlist);
 		WORKLIST_INSERT(&inodedep->id_bufwait, &dap->da_list);
 	}
 	/*
 	 * If we're making a new name for a directory that has not been
 	 * committed when need to move the dot and dotdot references to
 	 * this new name.
 	 */
 	if (inodedep->id_mkdiradd && dp->i_offset != DOTDOT_OFFSET)
 		merge_diradd(inodedep, dap);
 	FREE_LOCK(ump);
 }
 
 /*
  * Called whenever the link count on an inode is changed.
  * It creates an inode dependency so that the new reference(s)
  * to the inode cannot be committed to disk until the updated
  * inode has been written.
  */
 void
 softdep_change_linkcnt(ip)
 	struct inode *ip;	/* the inode with the increased link count */
 {
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_change_linkcnt called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ump);
 	inodedep_lookup(UFSTOVFS(ump), ip->i_number, DEPALLOC, &inodedep);
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("softdep_change_linkcnt: bad delta");
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	FREE_LOCK(ump);
 }
 
 /*
  * Attach a sbdep dependency to the superblock buf so that we can keep
  * track of the head of the linked list of referenced but unlinked inodes.
  */
 void
 softdep_setup_sbupdate(ump, fs, bp)
 	struct ufsmount *ump;
 	struct fs *fs;
 	struct buf *bp;
 {
 	struct sbdep *sbdep;
 	struct worklist *wk;
 
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_sbupdate called on non-softdep filesystem"));
 	LIST_FOREACH(wk, &bp->b_dep, wk_list)
 		if (wk->wk_type == D_SBDEP)
 			break;
 	if (wk != NULL)
 		return;
 	sbdep = malloc(sizeof(struct sbdep), M_SBDEP, M_SOFTDEP_FLAGS);
 	workitem_alloc(&sbdep->sb_list, D_SBDEP, UFSTOVFS(ump));
 	sbdep->sb_fs = fs;
 	sbdep->sb_ump = ump;
 	ACQUIRE_LOCK(ump);
 	WORKLIST_INSERT(&bp->b_dep, &sbdep->sb_list);
 	FREE_LOCK(ump);
 }
 
 /*
  * Return the first unlinked inodedep which is ready to be the head of the
  * list.  The inodedep and all those after it must have valid next pointers.
  */
 static struct inodedep *
 first_unlinked_inodedep(ump)
 	struct ufsmount *ump;
 {
 	struct inodedep *inodedep;
 	struct inodedep *idp;
 
 	LOCK_OWNED(ump);
 	for (inodedep = TAILQ_LAST(&ump->softdep_unlinked, inodedeplst);
 	    inodedep; inodedep = idp) {
 		if ((inodedep->id_state & UNLINKNEXT) == 0)
 			return (NULL);
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		if (idp == NULL || (idp->id_state & UNLINKNEXT) == 0)
 			break;
 		if ((inodedep->id_state & UNLINKPREV) == 0)
 			break;
 	}
 	return (inodedep);
 }
 
 /*
  * Set the sujfree unlinked head pointer prior to writing a superblock.
  */
 static void
 initiate_write_sbdep(sbdep)
 	struct sbdep *sbdep;
 {
 	struct inodedep *inodedep;
 	struct fs *bpfs;
 	struct fs *fs;
 
 	bpfs = sbdep->sb_fs;
 	fs = sbdep->sb_ump->um_fs;
 	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
 	if (inodedep) {
 		fs->fs_sujfree = inodedep->id_ino;
 		inodedep->id_state |= UNLINKPREV;
 	} else
 		fs->fs_sujfree = 0;
 	bpfs->fs_sujfree = fs->fs_sujfree;
 }
 
 /*
  * After a superblock is written determine whether it must be written again
  * due to a changing unlinked list head.
  */
 static int
 handle_written_sbdep(sbdep, bp)
 	struct sbdep *sbdep;
 	struct buf *bp;
 {
 	struct inodedep *inodedep;
 	struct fs *fs;
 
 	LOCK_OWNED(sbdep->sb_ump);
 	fs = sbdep->sb_fs;
 	/*
 	 * If the superblock doesn't match the in-memory list start over.
 	 */
 	inodedep = first_unlinked_inodedep(sbdep->sb_ump);
 	if ((inodedep && fs->fs_sujfree != inodedep->id_ino) ||
 	    (inodedep == NULL && fs->fs_sujfree != 0)) {
 		bdirty(bp);
 		return (1);
 	}
 	WORKITEM_FREE(sbdep, D_SBDEP);
 	if (fs->fs_sujfree == 0)
 		return (0);
 	/*
 	 * Now that we have a record of this inode in stable store allow it
 	 * to be written to free up pending work.  Inodes may see a lot of
 	 * write activity after they are unlinked which we must not hold up.
 	 */
 	for (; inodedep != NULL; inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
 		if ((inodedep->id_state & UNLINKLINKS) != UNLINKLINKS)
 			panic("handle_written_sbdep: Bad inodedep %p (0x%X)",
 			    inodedep, inodedep->id_state);
 		if (inodedep->id_state & UNLINKONLIST)
 			break;
 		inodedep->id_state |= DEPCOMPLETE | UNLINKONLIST;
 	}
 
 	return (0);
 }
 
 /*
  * Mark an inodedep as unlinked and insert it into the in-memory unlinked list.
  */
 static void
 unlinked_inodedep(mp, inodedep)
 	struct mount *mp;
 	struct inodedep *inodedep;
 {
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	if (MOUNTEDSUJ(mp) == 0)
 		return;
 	ump->um_fs->fs_fmod = 1;
 	if (inodedep->id_state & UNLINKED)
 		panic("unlinked_inodedep: %p already unlinked\n", inodedep);
 	inodedep->id_state |= UNLINKED;
 	TAILQ_INSERT_HEAD(&ump->softdep_unlinked, inodedep, id_unlinked);
 }
 
 /*
  * Remove an inodedep from the unlinked inodedep list.  This may require
  * disk writes if the inode has made it that far.
  */
 static void
 clear_unlinked_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
 	struct ufsmount *ump;
 	struct inodedep *idp;
 	struct inodedep *idn;
 	struct fs *fs;
 	struct buf *bp;
 	ino_t ino;
 	ino_t nino;
 	ino_t pino;
 	int error;
 
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	fs = ump->um_fs;
 	ino = inodedep->id_ino;
 	error = 0;
 	for (;;) {
 		LOCK_OWNED(ump);
 		KASSERT((inodedep->id_state & UNLINKED) != 0,
 		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
 		    inodedep));
 		/*
 		 * If nothing has yet been written simply remove us from
 		 * the in memory list and return.  This is the most common
 		 * case where handle_workitem_remove() loses the final
 		 * reference.
 		 */
 		if ((inodedep->id_state & UNLINKLINKS) == 0)
 			break;
 		/*
 		 * If we have a NEXT pointer and no PREV pointer we can simply
 		 * clear NEXT's PREV and remove ourselves from the list.  Be
 		 * careful not to clear PREV if the superblock points at
 		 * next as well.
 		 */
 		idn = TAILQ_NEXT(inodedep, id_unlinked);
 		if ((inodedep->id_state & UNLINKLINKS) == UNLINKNEXT) {
 			if (idn && fs->fs_sujfree != idn->id_ino)
 				idn->id_state &= ~UNLINKPREV;
 			break;
 		}
 		/*
 		 * Here we have an inodedep which is actually linked into
 		 * the list.  We must remove it by forcing a write to the
 		 * link before us, whether it be the superblock or an inode.
 		 * Unfortunately the list may change while we're waiting
 		 * on the buf lock for either resource so we must loop until
 		 * we lock the right one.  If both the superblock and an
 		 * inode point to this inode we must clear the inode first
 		 * followed by the superblock.
 		 */
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		pino = 0;
 		if (idp && (idp->id_state & UNLINKNEXT))
 			pino = idp->id_ino;
 		FREE_LOCK(ump);
 		if (pino == 0) {
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
 		} else {
 			error = bread(ump->um_devvp,
 			    fsbtodb(fs, ino_to_fsba(fs, pino)),
 			    (int)fs->fs_bsize, NOCRED, &bp);
 			if (error)
 				brelse(bp);
 		}
 		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 		/* If the list has changed restart the loop. */
 		idp = TAILQ_PREV(inodedep, inodedeplst, id_unlinked);
 		nino = 0;
 		if (idp && (idp->id_state & UNLINKNEXT))
 			nino = idp->id_ino;
 		if (nino != pino ||
 		    (inodedep->id_state & UNLINKPREV) != UNLINKPREV) {
 			FREE_LOCK(ump);
 			brelse(bp);
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		nino = 0;
 		idn = TAILQ_NEXT(inodedep, id_unlinked);
 		if (idn)
 			nino = idn->id_ino;
 		/*
 		 * Remove us from the in memory list.  After this we cannot
 		 * access the inodedep.
 		 */
 		KASSERT((inodedep->id_state & UNLINKED) != 0,
 		    ("clear_unlinked_inodedep: inodedep %p not unlinked",
 		    inodedep));
 		inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
 		TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
 		FREE_LOCK(ump);
 		/*
 		 * The predecessor's next pointer is manually updated here
 		 * so that the NEXT flag is never cleared for an element
 		 * that is in the list.
 		 */
 		if (pino == 0) {
 			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
 		} else if (fs->fs_magic == FS_UFS1_MAGIC)
 			((struct ufs1_dinode *)bp->b_data +
 			    ino_to_fsbo(fs, pino))->di_freelink = nino;
 		else
 			((struct ufs2_dinode *)bp->b_data +
 			    ino_to_fsbo(fs, pino))->di_freelink = nino;
 		/*
 		 * If the bwrite fails we have no recourse to recover.  The
 		 * filesystem is corrupted already.
 		 */
 		bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		/*
 		 * If the superblock pointer still needs to be cleared force
 		 * a write here.
 		 */
 		if (fs->fs_sujfree == ino) {
 			FREE_LOCK(ump);
 			bp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc),
 			    (int)fs->fs_sbsize, 0, 0, 0);
 			bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize);
 			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
 			bwrite(bp);
 			ACQUIRE_LOCK(ump);
 		}
 
 		if (fs->fs_sujfree != ino)
 			return;
 		panic("clear_unlinked_inodedep: Failed to clear free head");
 	}
 	if (inodedep->id_ino == fs->fs_sujfree)
 		panic("clear_unlinked_inodedep: Freeing head of free list");
 	inodedep->id_state &= ~(UNLINKED | UNLINKLINKS | UNLINKONLIST);
 	TAILQ_REMOVE(&ump->softdep_unlinked, inodedep, id_unlinked);
 	return;
 }
 
 /*
  * This workitem decrements the inode's link count.
  * If the link count reaches zero, the file is removed.
  */
 static int
 handle_workitem_remove(dirrem, flags)
 	struct dirrem *dirrem;
 	int flags;
 {
 	struct inodedep *inodedep;
 	struct workhead dotdotwk;
 	struct worklist *wk;
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *vp;
 	struct inode *ip;
 	ino_t oldinum;
 
 	if (dirrem->dm_state & ONWORKLIST)
 		panic("handle_workitem_remove: dirrem %p still on worklist",
 		    dirrem);
 	oldinum = dirrem->dm_oldinum;
 	mp = dirrem->dm_list.wk_mp;
 	ump = VFSTOUFS(mp);
 	flags |= LK_EXCLUSIVE;
 	if (ffs_vgetf(mp, oldinum, flags, &vp, FFSV_FORCEINSMQ) != 0)
 		return (EBUSY);
 	ip = VTOI(vp);
 	ACQUIRE_LOCK(ump);
 	if ((inodedep_lookup(mp, oldinum, 0, &inodedep)) == 0)
 		panic("handle_workitem_remove: lost inodedep");
 	if (dirrem->dm_state & ONDEPLIST)
 		LIST_REMOVE(dirrem, dm_inonext);
 	KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
 	    ("handle_workitem_remove:  Journal entries not written."));
 
 	/*
 	 * Move all dependencies waiting on the remove to complete
 	 * from the dirrem to the inode inowait list to be completed
 	 * after the inode has been updated and written to disk.  Any
 	 * marked MKDIR_PARENT are saved to be completed when the .. ref
 	 * is removed.
 	 */
 	LIST_INIT(&dotdotwk);
 	while ((wk = LIST_FIRST(&dirrem->dm_jwork)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		if (wk->wk_state & MKDIR_PARENT) {
 			wk->wk_state &= ~MKDIR_PARENT;
 			WORKLIST_INSERT(&dotdotwk, wk);
 			continue;
 		}
 		WORKLIST_INSERT(&inodedep->id_inowait, wk);
 	}
 	LIST_SWAP(&dirrem->dm_jwork, &dotdotwk, worklist, wk_list);
 	/*
 	 * Normal file deletion.
 	 */
 	if ((dirrem->dm_state & RMDIR) == 0) {
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
 		if (ip->i_nlink < ip->i_effnlink)
 			panic("handle_workitem_remove: bad file delta");
 		if (ip->i_nlink == 0) 
 			unlinked_inodedep(mp, inodedep);
 		inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
 		    ("handle_workitem_remove: worklist not empty. %s",
 		    TYPENAME(LIST_FIRST(&dirrem->dm_jwork)->wk_type)));
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(ump);
 		goto out;
 	}
 	/*
 	 * Directory deletion. Decrement reference count for both the
 	 * just deleted parent directory entry and the reference for ".".
 	 * Arrange to have the reference count on the parent decremented
 	 * to account for the loss of "..".
 	 */
 	ip->i_nlink -= 2;
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	ip->i_flag |= IN_CHANGE;
 	if (ip->i_nlink < ip->i_effnlink)
 		panic("handle_workitem_remove: bad dir delta");
 	if (ip->i_nlink == 0)
 		unlinked_inodedep(mp, inodedep);
 	inodedep->id_nlinkdelta = ip->i_nlink - ip->i_effnlink;
 	/*
 	 * Rename a directory to a new parent. Since, we are both deleting
 	 * and creating a new directory entry, the link count on the new
 	 * directory should not change. Thus we skip the followup dirrem.
 	 */
 	if (dirrem->dm_state & DIRCHG) {
 		KASSERT(LIST_EMPTY(&dirrem->dm_jwork),
 		    ("handle_workitem_remove: DIRCHG and worklist not empty."));
 		WORKITEM_FREE(dirrem, D_DIRREM);
 		FREE_LOCK(ump);
 		goto out;
 	}
 	dirrem->dm_state = ONDEPLIST;
 	dirrem->dm_oldinum = dirrem->dm_dirinum;
 	/*
 	 * Place the dirrem on the parent's diremhd list.
 	 */
 	if (inodedep_lookup(mp, dirrem->dm_oldinum, 0, &inodedep) == 0)
 		panic("handle_workitem_remove: lost dir inodedep");
 	LIST_INSERT_HEAD(&inodedep->id_dirremhd, dirrem, dm_inonext);
 	/*
 	 * If the allocated inode has never been written to disk, then
 	 * the on-disk inode is zero'ed and we can remove the file
 	 * immediately.  When journaling if the inode has been marked
 	 * unlinked and not DEPCOMPLETE we know it can never be written.
 	 */
 	inodedep_lookup(mp, oldinum, 0, &inodedep);
 	if (inodedep == NULL ||
 	    (inodedep->id_state & (DEPCOMPLETE | UNLINKED)) == UNLINKED ||
 	    check_inode_unwritten(inodedep)) {
 		FREE_LOCK(ump);
 		vput(vp);
 		return handle_workitem_remove(dirrem, flags);
 	}
 	WORKLIST_INSERT(&inodedep->id_inowait, &dirrem->dm_list);
 	FREE_LOCK(ump);
 	ip->i_flag |= IN_CHANGE;
 out:
 	ffs_update(vp, 0);
 	vput(vp);
 	return (0);
 }
 
 /*
  * Inode de-allocation dependencies.
  * 
  * When an inode's link count is reduced to zero, it can be de-allocated. We
  * found it convenient to postpone de-allocation until after the inode is
  * written to disk with its new link count (zero).  At this point, all of the
  * on-disk inode's block pointers are nullified and, with careful dependency
  * list ordering, all dependencies related to the inode will be satisfied and
  * the corresponding dependency structures de-allocated.  So, if/when the
  * inode is reused, there will be no mixing of old dependencies with new
  * ones.  This artificial dependency is set up by the block de-allocation
  * procedure above (softdep_setup_freeblocks) and completed by the
  * following procedure.
  */
 static void 
 handle_workitem_freefile(freefile)
 	struct freefile *freefile;
 {
 	struct workhead wkhd;
 	struct fs *fs;
 	struct inodedep *idp;
 	struct ufsmount *ump;
 	int error;
 
 	ump = VFSTOUFS(freefile->fx_list.wk_mp);
 	fs = ump->um_fs;
 #ifdef DEBUG
 	ACQUIRE_LOCK(ump);
 	error = inodedep_lookup(UFSTOVFS(ump), freefile->fx_oldinum, 0, &idp);
 	FREE_LOCK(ump);
 	if (error)
 		panic("handle_workitem_freefile: inodedep %p survived", idp);
 #endif
 	UFS_LOCK(ump);
 	fs->fs_pendinginodes -= 1;
 	UFS_UNLOCK(ump);
 	LIST_INIT(&wkhd);
 	LIST_SWAP(&freefile->fx_jwork, &wkhd, worklist, wk_list);
 	if ((error = ffs_freefile(ump, fs, freefile->fx_devvp,
 	    freefile->fx_oldinum, freefile->fx_mode, &wkhd)) != 0)
 		softdep_error("handle_workitem_freefile", error);
 	ACQUIRE_LOCK(ump);
 	WORKITEM_FREE(freefile, D_FREEFILE);
 	FREE_LOCK(ump);
 }
 
 
 /*
  * Helper function which unlinks marker element from work list and returns
  * the next element on the list.
  */
 static __inline struct worklist *
 markernext(struct worklist *marker)
 {
 	struct worklist *next;
 	
 	next = LIST_NEXT(marker, wk_list);
 	LIST_REMOVE(marker, wk_list);
 	return next;
 }
 
 /*
  * Disk writes.
  * 
  * The dependency structures constructed above are most actively used when file
  * system blocks are written to disk.  No constraints are placed on when a
  * block can be written, but unsatisfied update dependencies are made safe by
  * modifying (or replacing) the source memory for the duration of the disk
  * write.  When the disk write completes, the memory block is again brought
  * up-to-date.
  *
  * In-core inode structure reclamation.
  * 
  * Because there are a finite number of "in-core" inode structures, they are
  * reused regularly.  By transferring all inode-related dependencies to the
  * in-memory inode block and indexing them separately (via "inodedep"s), we
  * can allow "in-core" inode structures to be reused at any time and avoid
  * any increase in contention.
  *
  * Called just before entering the device driver to initiate a new disk I/O.
  * The buffer must be locked, thus, no I/O completion operations can occur
  * while we are manipulating its associated dependencies.
  */
 static void 
 softdep_disk_io_initiation(bp)
 	struct buf *bp;		/* structure describing disk write to occur */
 {
 	struct worklist *wk;
 	struct worklist marker;
 	struct inodedep *inodedep;
 	struct freeblks *freeblks;
 	struct jblkdep *jblkdep;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 
 	/*
 	 * We only care about write operations. There should never
 	 * be dependencies for reads.
 	 */
 	if (bp->b_iocmd != BIO_WRITE)
 		panic("softdep_disk_io_initiation: not write");
 
 	if (bp->b_vflags & BV_BKGRDINPROG)
 		panic("softdep_disk_io_initiation: Writing buffer with "
 		    "background write in progress: %p", bp);
 
 	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
 		return;
 	ump = VFSTOUFS(wk->wk_mp);
 
 	marker.wk_type = D_LAST + 1;	/* Not a normal workitem */
 	PHOLD(curproc);			/* Don't swap out kernel stack */
 	ACQUIRE_LOCK(ump);
 	/*
 	 * Do any necessary pre-I/O processing.
 	 */
 	for (wk = LIST_FIRST(&bp->b_dep); wk != NULL;
 	     wk = markernext(&marker)) {
 		LIST_INSERT_AFTER(wk, &marker, wk_list);
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			initiate_write_filepage(WK_PAGEDEP(wk), bp);
 			continue;
 
 		case D_INODEDEP:
 			inodedep = WK_INODEDEP(wk);
 			if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC)
 				initiate_write_inodeblock_ufs1(inodedep, bp);
 			else
 				initiate_write_inodeblock_ufs2(inodedep, bp);
 			continue;
 
 		case D_INDIRDEP:
 			initiate_write_indirdep(WK_INDIRDEP(wk), bp);
 			continue;
 
 		case D_BMSAFEMAP:
 			initiate_write_bmsafemap(WK_BMSAFEMAP(wk), bp);
 			continue;
 
 		case D_JSEG:
 			WK_JSEG(wk)->js_buf = NULL;
 			continue;
 
 		case D_FREEBLKS:
 			freeblks = WK_FREEBLKS(wk);
 			jblkdep = LIST_FIRST(&freeblks->fb_jblkdephd);
 			/*
 			 * We have to wait for the freeblks to be journaled
 			 * before we can write an inodeblock with updated
 			 * pointers.  Be careful to arrange the marker so
 			 * we revisit the freeblks if it's not removed by
 			 * the first jwait().
 			 */
 			if (jblkdep != NULL) {
 				LIST_REMOVE(&marker, wk_list);
 				LIST_INSERT_BEFORE(wk, &marker, wk_list);
 				jwait(&jblkdep->jb_list, MNT_WAIT);
 			}
 			continue;
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			/*
 			 * We have to wait for the jnewblk to be journaled
 			 * before we can write to a block if the contents
 			 * may be confused with an earlier file's indirect
 			 * at recovery time.  Handle the marker as described
 			 * above.
 			 */
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk != NULL &&
 			    indirblk_lookup(newblk->nb_list.wk_mp,
 			    newblk->nb_newblkno)) {
 				LIST_REMOVE(&marker, wk_list);
 				LIST_INSERT_BEFORE(wk, &marker, wk_list);
 				jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			}
 			continue;
 
 		case D_SBDEP:
 			initiate_write_sbdep(WK_SBDEP(wk));
 			continue;
 
 		case D_MKDIR:
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 			continue;
 
 		default:
 			panic("handle_disk_io_initiation: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	FREE_LOCK(ump);
 	PRELE(curproc);			/* Allow swapout of kernel stack */
 }
 
 /*
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in a directory. The buffer must be locked,
  * thus, no I/O completion operations can occur while we are
  * manipulating its associated dependencies.
  */
 static void
 initiate_write_filepage(pagedep, bp)
 	struct pagedep *pagedep;
 	struct buf *bp;
 {
 	struct jremref *jremref;
 	struct jmvref *jmvref;
 	struct dirrem *dirrem;
 	struct diradd *dap;
 	struct direct *ep;
 	int i;
 
 	if (pagedep->pd_state & IOSTARTED) {
 		/*
 		 * This can only happen if there is a driver that does not
 		 * understand chaining. Here biodone will reissue the call
 		 * to strategy for the incomplete buffers.
 		 */
 		printf("initiate_write_filepage: already started\n");
 		return;
 	}
 	pagedep->pd_state |= IOSTARTED;
 	/*
 	 * Wait for all journal remove dependencies to hit the disk.
 	 * We can not allow any potentially conflicting directory adds
 	 * to be visible before removes and rollback is too difficult.
 	 * The per-filesystem lock may be dropped and re-acquired, however 
 	 * we hold the buf locked so the dependency can not go away.
 	 */
 	LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next)
 		while ((jremref = LIST_FIRST(&dirrem->dm_jremrefhd)) != NULL)
 			jwait(&jremref->jr_list, MNT_WAIT);
 	while ((jmvref = LIST_FIRST(&pagedep->pd_jmvrefhd)) != NULL)
 		jwait(&jmvref->jm_list, MNT_WAIT);
 	for (i = 0; i < DAHASHSZ; i++) {
 		LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			if (ep->d_ino != dap->da_newinum)
 				panic("%s: dir inum %ju != new %ju",
 				    "initiate_write_filepage",
 				    (uintmax_t)ep->d_ino,
 				    (uintmax_t)dap->da_newinum);
 			if (dap->da_state & DIRCHG)
 				ep->d_ino = dap->da_previous->dm_oldinum;
 			else
 				ep->d_ino = 0;
 			dap->da_state &= ~ATTACHED;
 			dap->da_state |= UNDONE;
 		}
 	}
 }
 
 /*
  * Version of initiate_write_inodeblock that handles UFS1 dinodes.
  * Note that any bug fixes made to this routine must be done in the
  * version found below.
  *
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock_ufs1(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct ufs1_dinode *dp;
 	struct ufs1_dinode *sip;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
 	ufs_lbn_t prevlbn = 0;
 #endif
 	int deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock_ufs1: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	LOCK_OWNED(ump);
 	dp = (struct ufs1_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
 	/*
 	 * If we're on the unlinked list but have not yet written our
 	 * next pointer initialize it here.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		dp->di_freelink = inon ? inon->id_ino : 0;
 	}
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino1 != NULL)
 			panic("initiate_write_inodeblock_ufs1: I/O underway");
 		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs1_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino1 = sip;
 		*inodedep->id_savedino1 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs1_dinode));
 		dp->di_gen = inodedep->id_savedino1->di_gen;
 		dp->di_freelink = inodedep->id_savedino1->di_freelink;
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = 0;
 	inodedep->id_savednlink = dp->di_nlink;
 	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
 	 * Revert the link count to that of the first unwritten journal entry.
 	 */
 	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
 	if (inoref)
 		dp->di_nlink = inoref->if_nlink;
 	/*
 	 * Set the dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_offset;
 		if (adp->ad_offset < NDADDR &&
 		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		if (adp->ad_offset >= NDADDR &&
 		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s: indirect pointer #%jd mismatch %d != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset - NDADDR,
 			    dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_offset >= NDADDR)
 			break;
 		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* INVARIANTS */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* INVARIANTS */
 			dp->di_ib[i] = 0;
 		}
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 		
 /*
  * Version of initiate_write_inodeblock that handles UFS2 dinodes.
  * Note that any bug fixes made to this routine must be done in the
  * version found above.
  *
  * Called from within the procedure above to deal with unsatisfied
  * allocation dependencies in an inodeblock. The buffer must be
  * locked, thus, no I/O completion operations can occur while we
  * are manipulating its associated dependencies.
  */
 static void 
 initiate_write_inodeblock_ufs2(inodedep, bp)
 	struct inodedep *inodedep;
 	struct buf *bp;			/* The inode block */
 {
 	struct allocdirect *adp, *lastadp;
 	struct ufs2_dinode *dp;
 	struct ufs2_dinode *sip;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct fs *fs;
 	ufs_lbn_t i;
 #ifdef INVARIANTS
 	ufs_lbn_t prevlbn = 0;
 #endif
 	int deplist;
 
 	if (inodedep->id_state & IOSTARTED)
 		panic("initiate_write_inodeblock_ufs2: already started");
 	inodedep->id_state |= IOSTARTED;
 	fs = inodedep->id_fs;
 	ump = VFSTOUFS(inodedep->id_list.wk_mp);
 	LOCK_OWNED(ump);
 	dp = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, inodedep->id_ino);
 
 	/*
 	 * If we're on the unlinked list but have not yet written our
 	 * next pointer initialize it here.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKNEXT)) == UNLINKED) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		dp->di_freelink = inon ? inon->id_ino : 0;
 	}
 	/*
 	 * If the bitmap is not yet written, then the allocated
 	 * inode cannot be written to disk.
 	 */
 	if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 		if (inodedep->id_savedino2 != NULL)
 			panic("initiate_write_inodeblock_ufs2: I/O underway");
 		FREE_LOCK(ump);
 		sip = malloc(sizeof(struct ufs2_dinode),
 		    M_SAVEDINO, M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 		inodedep->id_savedino2 = sip;
 		*inodedep->id_savedino2 = *dp;
 		bzero((caddr_t)dp, sizeof(struct ufs2_dinode));
 		dp->di_gen = inodedep->id_savedino2->di_gen;
 		dp->di_freelink = inodedep->id_savedino2->di_freelink;
 		return;
 	}
 	/*
 	 * If no dependencies, then there is nothing to roll back.
 	 */
 	inodedep->id_savedsize = dp->di_size;
 	inodedep->id_savedextsize = dp->di_extsize;
 	inodedep->id_savednlink = dp->di_nlink;
 	if (TAILQ_EMPTY(&inodedep->id_inoupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_extupdt) &&
 	    TAILQ_EMPTY(&inodedep->id_inoreflst))
 		return;
 	/*
 	 * Revert the link count to that of the first unwritten journal entry.
 	 */
 	inoref = TAILQ_FIRST(&inodedep->id_inoreflst);
 	if (inoref)
 		dp->di_nlink = inoref->if_nlink;
 
 	/*
 	 * Set the ext data dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		prevlbn = adp->ad_offset;
 		if (dp->di_extb[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    (intmax_t)dp->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the ext
 	 * data which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_extupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		dp->di_extb[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_extsize = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NXADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_extb[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep1");
 #endif /* INVARIANTS */
 			dp->di_extb[i] = 0;
 		}
 		lastadp = NULL;
 		break;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the ext
 	 * data, roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_extsize <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_extb[i] != 0)
 				break;
 		dp->di_extsize = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * Set the file data dependencies to busy.
 	 */
 	for (deplist = 0, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     adp = TAILQ_NEXT(adp, ad_next)) {
 #ifdef INVARIANTS
 		if (deplist != 0 && prevlbn >= adp->ad_offset)
 			panic("softdep_write_inodeblock: lbn order");
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("inodedep %p and adp %p not attached", inodedep, adp);
 		prevlbn = adp->ad_offset;
 		if (adp->ad_offset < NDADDR &&
 		    dp->di_db[adp->ad_offset] != adp->ad_newblkno)
 			panic("%s: direct pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock",
 			    (intmax_t)adp->ad_offset,
 			    (intmax_t)dp->di_db[adp->ad_offset],
 			    (intmax_t)adp->ad_newblkno);
 		if (adp->ad_offset >= NDADDR &&
 		    dp->di_ib[adp->ad_offset - NDADDR] != adp->ad_newblkno)
 			panic("%s indirect pointer #%jd mismatch %jd != %jd",
 			    "softdep_write_inodeblock:",
 			    (intmax_t)adp->ad_offset - NDADDR,
 			    (intmax_t)dp->di_ib[adp->ad_offset - NDADDR],
 			    (intmax_t)adp->ad_newblkno);
 		deplist |= 1 << adp->ad_offset;
 		if ((adp->ad_state & ATTACHED) == 0)
 			panic("softdep_write_inodeblock: Unknown state 0x%x",
 			    adp->ad_state);
 #endif /* INVARIANTS */
 		adp->ad_state &= ~ATTACHED;
 		adp->ad_state |= UNDONE;
 	}
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem.
 	 */
 	for (lastadp = NULL, adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp;
 	     lastadp = adp, adp = TAILQ_NEXT(adp, ad_next)) {
 		if (adp->ad_offset >= NDADDR)
 			break;
 		dp->di_db[adp->ad_offset] = adp->ad_oldblkno;
 		/* keep going until hitting a rollback to a frag */
 		if (adp->ad_oldsize == 0 || adp->ad_oldsize == fs->fs_bsize)
 			continue;
 		dp->di_size = fs->fs_bsize * adp->ad_offset + adp->ad_oldsize;
 		for (i = adp->ad_offset + 1; i < NDADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_db[i] != 0 && (deplist & (1 << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep2");
 #endif /* INVARIANTS */
 			dp->di_db[i] = 0;
 		}
 		for (i = 0; i < NIADDR; i++) {
 #ifdef INVARIANTS
 			if (dp->di_ib[i] != 0 &&
 			    (deplist & ((1 << NDADDR) << i)) == 0)
 				panic("softdep_write_inodeblock: lost dep3");
 #endif /* INVARIANTS */
 			dp->di_ib[i] = 0;
 		}
 		return;
 	}
 	/*
 	 * If we have zero'ed out the last allocated block of the file,
 	 * roll back the size to the last currently allocated block.
 	 * We know that this last allocated block is a full-sized as
 	 * we already checked for fragments in the loop above.
 	 */
 	if (lastadp != NULL &&
 	    dp->di_size <= (lastadp->ad_offset + 1) * fs->fs_bsize) {
 		for (i = lastadp->ad_offset; i >= 0; i--)
 			if (dp->di_db[i] != 0)
 				break;
 		dp->di_size = (i + 1) * fs->fs_bsize;
 	}
 	/*
 	 * The only dependencies are for indirect blocks.
 	 *
 	 * The file size for indirect block additions is not guaranteed.
 	 * Such a guarantee would be non-trivial to achieve. The conventional
 	 * synchronous write implementation also does not make this guarantee.
 	 * Fsck should catch and fix discrepancies. Arguably, the file size
 	 * can be over-estimated without destroying integrity when the file
 	 * moves into the indirect blocks (i.e., is large). If we want to
 	 * postpone fsck, we are stuck with this argument.
 	 */
 	for (; adp; adp = TAILQ_NEXT(adp, ad_next))
 		dp->di_ib[adp->ad_offset - NDADDR] = 0;
 }
 
 /*
  * Cancel an indirdep as a result of truncation.  Release all of the
  * children allocindirs and place their journal work on the appropriate
  * list.
  */
 static void
 cancel_indirdep(indirdep, bp, freeblks)
 	struct indirdep *indirdep;
 	struct buf *bp;
 	struct freeblks *freeblks;
 {
 	struct allocindir *aip;
 
 	/*
 	 * None of the indirect pointers will ever be visible,
 	 * so they can simply be tossed. GOINGAWAY ensures
 	 * that allocated pointers will be saved in the buffer
 	 * cache until they are freed. Note that they will
 	 * only be able to be found by their physical address
 	 * since the inode mapping the logical address will
 	 * be gone. The save buffer used for the safe copy
 	 * was allocated in setup_allocindir_phase2 using
 	 * the physical address so it could be used for this
 	 * purpose. Hence we swap the safe copy with the real
 	 * copy, allowing the safe copy to be freed and holding
 	 * on to the real copy for later use in indir_trunc.
 	 */
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("cancel_indirdep: already gone");
 	if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 		indirdep->ir_state |= DEPCOMPLETE;
 		LIST_REMOVE(indirdep, ir_next);
 	}
 	indirdep->ir_state |= GOINGAWAY;
 	/*
 	 * Pass in bp for blocks still have journal writes
 	 * pending so we can cancel them on their own.
 	 */
 	while ((aip = LIST_FIRST(&indirdep->ir_deplisthd)) != NULL)
 		cancel_allocindir(aip, bp, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	while ((aip = LIST_FIRST(&indirdep->ir_completehd)) != NULL)
 		cancel_allocindir(aip, NULL, freeblks, 0);
 	/*
 	 * If there are pending partial truncations we need to keep the
 	 * old block copy around until they complete.  This is because
 	 * the current b_data is not a perfect superset of the available
 	 * blocks.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc))
 		bcopy(bp->b_data, indirdep->ir_savebp->b_data, bp->b_bcount);
 	else
 		bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 	WORKLIST_REMOVE(&indirdep->ir_list);
 	WORKLIST_INSERT(&indirdep->ir_savebp->b_dep, &indirdep->ir_list);
 	indirdep->ir_bp = NULL;
 	indirdep->ir_freeblks = freeblks;
 }
 
 /*
  * Free an indirdep once it no longer has new pointers to track.
  */
 static void
 free_indirdep(indirdep)
 	struct indirdep *indirdep;
 {
 
 	KASSERT(TAILQ_EMPTY(&indirdep->ir_trunc),
 	    ("free_indirdep: Indir trunc list not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_completehd),
 	    ("free_indirdep: Complete head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_writehd),
 	    ("free_indirdep: write head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_donehd),
 	    ("free_indirdep: done head not empty."));
 	KASSERT(LIST_EMPTY(&indirdep->ir_deplisthd),
 	    ("free_indirdep: deplist head not empty."));
 	KASSERT((indirdep->ir_state & DEPCOMPLETE),
 	    ("free_indirdep: %p still on newblk list.", indirdep));
 	KASSERT(indirdep->ir_saveddata == NULL,
 	    ("free_indirdep: %p still has saved data.", indirdep));
 	if (indirdep->ir_state & ONWORKLIST)
 		WORKLIST_REMOVE(&indirdep->ir_list);
 	WORKITEM_FREE(indirdep, D_INDIRDEP);
 }
 
 /*
  * Called before a write to an indirdep.  This routine is responsible for
  * rolling back pointers to a safe state which includes only those
  * allocindirs which have been completed.
  */
 static void
 initiate_write_indirdep(indirdep, bp)
 	struct indirdep *indirdep;
 	struct buf *bp;
 {
 	struct ufsmount *ump;
 
 	indirdep->ir_state |= IOSTARTED;
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("disk_io_initiation: indirdep gone");
 	/*
 	 * If there are no remaining dependencies, this will be writing
 	 * the real pointers.
 	 */
 	if (LIST_EMPTY(&indirdep->ir_deplisthd) &&
 	    TAILQ_EMPTY(&indirdep->ir_trunc))
 		return;
 	/*
 	 * Replace up-to-date version with safe version.
 	 */
 	if (indirdep->ir_saveddata == NULL) {
 		ump = VFSTOUFS(indirdep->ir_list.wk_mp);
 		LOCK_OWNED(ump);
 		FREE_LOCK(ump);
 		indirdep->ir_saveddata = malloc(bp->b_bcount, M_INDIRDEP,
 		    M_SOFTDEP_FLAGS);
 		ACQUIRE_LOCK(ump);
 	}
 	indirdep->ir_state &= ~ATTACHED;
 	indirdep->ir_state |= UNDONE;
 	bcopy(bp->b_data, indirdep->ir_saveddata, bp->b_bcount);
 	bcopy(indirdep->ir_savebp->b_data, bp->b_data,
 	    bp->b_bcount);
 }
 
 /*
  * Called when an inode has been cleared in a cg bitmap.  This finally
  * eliminates any canceled jaddrefs
  */
 void
 softdep_setup_inofree(mp, bp, ino, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ino_t ino;
 	struct workhead *wkhd;
 {
 	struct worklist *wk, *wkn;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	uint8_t *inosused;
 	struct cg *cgp;
 	struct fs *fs;
 
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_setup_inofree called on non-softdep filesystem"));
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	fs = ump->um_fs;
 	cgp = (struct cg *)bp->b_data;
 	inosused = cg_inosused(cgp);
 	if (isset(inosused, ino % fs->fs_ipg))
 		panic("softdep_setup_inofree: inode %ju not freed.",
 		    (uintmax_t)ino);
 	if (inodedep_lookup(mp, ino, 0, &inodedep))
 		panic("softdep_setup_inofree: ino %ju has existing inodedep %p",
 		    (uintmax_t)ino, inodedep);
 	if (wkhd) {
 		LIST_FOREACH_SAFE(wk, wkhd, wk_list, wkn) {
 			if (wk->wk_type != D_JADDREF)
 				continue;
 			WORKLIST_REMOVE(wk);
 			/*
 			 * We can free immediately even if the jaddref
 			 * isn't attached in a background write as now
 			 * the bitmaps are reconciled.
 			 */
 			wk->wk_state |= COMPLETE | ATTACHED;
 			free_jaddref(WK_JADDREF(wk));
 		}
 		jwork_move(&bp->b_dep, wkhd);
 	}
 	FREE_LOCK(ump);
 }
 
 
 /*
  * Called via ffs_blkfree() after a set of frags has been cleared from a cg
  * map.  Any dependencies waiting for the write to clear are added to the
  * buf's list and any jnewblks that are being canceled are discarded
  * immediately.
  */
 void
 softdep_setup_blkfree(mp, bp, blkno, frags, wkhd)
 	struct mount *mp;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int frags;
 	struct workhead *wkhd;
 {
 	struct bmsafemap *bmsafemap;
 	struct jnewblk *jnewblk;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct fs *fs;
 #ifdef SUJ_DEBUG
 	uint8_t *blksfree;
 	struct cg *cgp;
 	ufs2_daddr_t jstart;
 	ufs2_daddr_t jend;
 	ufs2_daddr_t end;
 	long bno;
 	int i;
 #endif
 
 	CTR3(KTR_SUJ,
 	    "softdep_setup_blkfree: blkno %jd frags %d wk head %p",
 	    blkno, frags, wkhd);
 
 	ump = VFSTOUFS(mp);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_setup_blkfree called on non-softdep filesystem"));
 	ACQUIRE_LOCK(ump);
 	/* Lookup the bmsafemap so we track when it is dirty. */
 	fs = ump->um_fs;
 	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
 	/*
 	 * Detach any jnewblks which have been canceled.  They must linger
 	 * until the bitmap is cleared again by ffs_blkfree() to prevent
 	 * an unjournaled allocation from hitting the disk.
 	 */
 	if (wkhd) {
 		while ((wk = LIST_FIRST(wkhd)) != NULL) {
 			CTR2(KTR_SUJ,
 			    "softdep_setup_blkfree: blkno %jd wk type %d",
 			    blkno, wk->wk_type);
 			WORKLIST_REMOVE(wk);
 			if (wk->wk_type != D_JNEWBLK) {
 				WORKLIST_INSERT(&bmsafemap->sm_freehd, wk);
 				continue;
 			}
 			jnewblk = WK_JNEWBLK(wk);
 			KASSERT(jnewblk->jn_state & GOINGAWAY,
 			    ("softdep_setup_blkfree: jnewblk not canceled."));
 #ifdef SUJ_DEBUG
 			/*
 			 * Assert that this block is free in the bitmap
 			 * before we discard the jnewblk.
 			 */
 			cgp = (struct cg *)bp->b_data;
 			blksfree = cg_blksfree(cgp);
 			bno = dtogd(fs, jnewblk->jn_blkno);
 			for (i = jnewblk->jn_oldfrags;
 			    i < jnewblk->jn_frags; i++) {
 				if (isset(blksfree, bno + i))
 					continue;
 				panic("softdep_setup_blkfree: not free");
 			}
 #endif
 			/*
 			 * Even if it's not attached we can free immediately
 			 * as the new bitmap is correct.
 			 */
 			wk->wk_state |= COMPLETE | ATTACHED;
 			free_jnewblk(jnewblk);
 		}
 	}
 
 #ifdef SUJ_DEBUG
 	/*
 	 * Assert that we are not freeing a block which has an outstanding
 	 * allocation dependency.
 	 */
 	fs = VFSTOUFS(mp)->um_fs;
 	bmsafemap = bmsafemap_lookup(mp, bp, dtog(fs, blkno), NULL);
 	end = blkno + frags;
 	LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
 		/*
 		 * Don't match against blocks that will be freed when the
 		 * background write is done.
 		 */
 		if ((jnewblk->jn_state & (ATTACHED | COMPLETE | DEPCOMPLETE)) ==
 		    (COMPLETE | DEPCOMPLETE))
 			continue;
 		jstart = jnewblk->jn_blkno + jnewblk->jn_oldfrags;
 		jend = jnewblk->jn_blkno + jnewblk->jn_frags;
 		if ((blkno >= jstart && blkno < jend) ||
 		    (end > jstart && end <= jend)) {
 			printf("state 0x%X %jd - %d %d dep %p\n",
 			    jnewblk->jn_state, jnewblk->jn_blkno,
 			    jnewblk->jn_oldfrags, jnewblk->jn_frags,
 			    jnewblk->jn_dep);
 			panic("softdep_setup_blkfree: "
 			    "%jd-%jd(%d) overlaps with %jd-%jd",
 			    blkno, end, frags, jstart, jend);
 		}
 	}
 #endif
 	FREE_LOCK(ump);
 }
 
 /*
  * Revert a block allocation when the journal record that describes it
  * is not yet written.
  */
 static int
 jnewblk_rollback(jnewblk, fs, cgp, blksfree)
 	struct jnewblk *jnewblk;
 	struct fs *fs;
 	struct cg *cgp;
 	uint8_t *blksfree;
 {
 	ufs1_daddr_t fragno;
 	long cgbno, bbase;
 	int frags, blk;
 	int i;
 
 	frags = 0;
 	cgbno = dtogd(fs, jnewblk->jn_blkno);
 	/*
 	 * We have to test which frags need to be rolled back.  We may
 	 * be operating on a stale copy when doing background writes.
 	 */
 	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++)
 		if (isclr(blksfree, cgbno + i))
 			frags++;
 	if (frags == 0)
 		return (0);
 	/*
 	 * This is mostly ffs_blkfree() sans some validation and
 	 * superblock updates.
 	 */
 	if (frags == fs->fs_frag) {
 		fragno = fragstoblks(fs, cgbno);
 		ffs_setblock(fs, blksfree, fragno);
 		ffs_clusteracct(fs, cgp, fragno, 1);
 		cgp->cg_cs.cs_nbfree++;
 	} else {
 		cgbno += jnewblk->jn_oldfrags;
 		bbase = cgbno - fragnum(fs, cgbno);
 		/* Decrement the old frags.  */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/* Deallocate the fragment */
 		for (i = 0; i < frags; i++)
 			setbit(blksfree, cgbno + i);
 		cgp->cg_cs.cs_nffree += frags;
 		/* Add back in counts associated with the new frags */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 		/* If a complete block has been reassembled, account for it. */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, 1);
 			cgp->cg_cs.cs_nbfree++;
 		}
 	}
 	stat_jnewblk++;
 	jnewblk->jn_state &= ~ATTACHED;
 	jnewblk->jn_state |= UNDONE;
 
 	return (frags);
 }
 
 static void
 initiate_write_bmsafemap(bmsafemap, bp)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;			/* The cg block. */
 {
 	struct jaddref *jaddref;
 	struct jnewblk *jnewblk;
 	uint8_t *inosused;
 	uint8_t *blksfree;
 	struct cg *cgp;
 	struct fs *fs;
 	ino_t ino;
 
 	/*
 	 * If this is a background write, we did this at the time that
 	 * the copy was made, so do not need to do it again.
 	 */
 	if (bmsafemap->sm_state & IOSTARTED)
 		return;
 	bmsafemap->sm_state |= IOSTARTED;
 	/*
 	 * Clear any inode allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jaddrefhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		inosused = cg_inosused(cgp);
 		LIST_FOREACH(jaddref, &bmsafemap->sm_jaddrefhd, ja_bmdeps) {
 			ino = jaddref->ja_ino % fs->fs_ipg;
 			if (isset(inosused, ino)) {
 				if ((jaddref->ja_mode & IFMT) == IFDIR)
 					cgp->cg_cs.cs_ndir--;
 				cgp->cg_cs.cs_nifree++;
 				clrbit(inosused, ino);
 				jaddref->ja_state &= ~ATTACHED;
 				jaddref->ja_state |= UNDONE;
 				stat_jaddref++;
 			} else
 				panic("initiate_write_bmsafemap: inode %ju "
 				    "marked free", (uintmax_t)jaddref->ja_ino);
 		}
 	}
 	/*
 	 * Clear any block allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		blksfree = cg_blksfree(cgp);
 		LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) {
 			if (jnewblk_rollback(jnewblk, fs, cgp, blksfree))
 				continue;
 			panic("initiate_write_bmsafemap: block %jd "
 			    "marked free", jnewblk->jn_blkno);
 		}
 	}
 	/*
 	 * Move allocation lists to the written lists so they can be
 	 * cleared once the block write is complete.
 	 */
 	LIST_SWAP(&bmsafemap->sm_inodedephd, &bmsafemap->sm_inodedepwr,
 	    inodedep, id_deps);
 	LIST_SWAP(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
 	    newblk, nb_deps);
 	LIST_SWAP(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr, worklist,
 	    wk_list);
 }
 
 /*
  * This routine is called during the completion interrupt
  * service routine for a disk write (from the procedure called
  * by the device driver to inform the filesystem caches of
  * a request completion).  It should be called early in this
  * procedure, before the block is made available to other
  * processes or other routines are called.
  *
  */
 static void 
 softdep_disk_write_complete(bp)
 	struct buf *bp;		/* describes the completed disk write */
 {
 	struct worklist *wk;
 	struct worklist *owk;
 	struct ufsmount *ump;
 	struct workhead reattach;
 	struct freeblks *freeblks;
 	struct buf *sbp;
 
 	/*
 	 * If an error occurred while doing the write, then the data
 	 * has not hit the disk and the dependencies cannot be processed.
 	 * But we do have to go through and roll forward any dependencies
 	 * that were rolled back before the disk write.
 	 */
 	if ((bp->b_ioflags & BIO_ERROR) != 0 && (bp->b_flags & B_INVAL) == 0) {
 		LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 			switch (wk->wk_type) {
 
 			case D_PAGEDEP:
 				handle_written_filepage(WK_PAGEDEP(wk), bp, 0);
 				continue;
 
 			case D_INODEDEP:
 				handle_written_inodeblock(WK_INODEDEP(wk),
 				    bp, 0);
 				continue;
 
 			case D_BMSAFEMAP:
 				handle_written_bmsafemap(WK_BMSAFEMAP(wk),
 				    bp, 0);
 				continue;
 
 			case D_INDIRDEP:
 				handle_written_indirdep(WK_INDIRDEP(wk),
 				    bp, &sbp, 0);
 				continue;
 			default:
 				/* nothing to roll forward */
 				continue;
 			}
 		}
 		return;
 	}
 	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
 		return;
 	ump = VFSTOUFS(wk->wk_mp);
 	LIST_INIT(&reattach);
 	/*
 	 * This lock must not be released anywhere in this code segment.
 	 */
 	sbp = NULL;
 	owk = NULL;
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(&bp->b_dep)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		atomic_add_long(&dep_write[wk->wk_type], 1);
 		if (wk == owk)
 			panic("duplicate worklist: %p\n", wk);
 		owk = wk;
 		switch (wk->wk_type) {
 
 		case D_PAGEDEP:
 			if (handle_written_filepage(WK_PAGEDEP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_INODEDEP:
 			if (handle_written_inodeblock(WK_INODEDEP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_BMSAFEMAP:
 			if (handle_written_bmsafemap(WK_BMSAFEMAP(wk), bp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_BODY);
 			continue;
 
 		case D_ALLOCDIRECT:
 			wk->wk_state |= COMPLETE;
 			handle_allocdirect_partdone(WK_ALLOCDIRECT(wk), NULL);
 			continue;
 
 		case D_ALLOCINDIR:
 			wk->wk_state |= COMPLETE;
 			handle_allocindir_partdone(WK_ALLOCINDIR(wk));
 			continue;
 
 		case D_INDIRDEP:
 			if (handle_written_indirdep(WK_INDIRDEP(wk), bp, &sbp,
 			    WRITESUCCEEDED))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_FREEBLKS:
 			wk->wk_state |= COMPLETE;
 			freeblks = WK_FREEBLKS(wk);
 			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE &&
 			    LIST_EMPTY(&freeblks->fb_jblkdephd))
 				add_to_worklist(wk, WK_NODELAY);
 			continue;
 
 		case D_FREEWORK:
 			handle_written_freework(WK_FREEWORK(wk));
 			break;
 
 		case D_JSEGDEP:
 			free_jsegdep(WK_JSEGDEP(wk));
 			continue;
 
 		case D_JSEG:
 			handle_written_jseg(WK_JSEG(wk), bp);
 			continue;
 
 		case D_SBDEP:
 			if (handle_written_sbdep(WK_SBDEP(wk), bp))
 				WORKLIST_INSERT(&reattach, wk);
 			continue;
 
 		case D_FREEDEP:
 			free_freedep(WK_FREEDEP(wk));
 			continue;
 
 		default:
 			panic("handle_disk_write_complete: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	/*
 	 * Reattach any requests that must be redone.
 	 */
 	while ((wk = LIST_FIRST(&reattach)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 	FREE_LOCK(ump);
 	if (sbp)
 		brelse(sbp);
 }
 
 /*
  * Called from within softdep_disk_write_complete above. Note that
  * this routine is always called from interrupt level with further
  * splbio interrupts blocked.
  */
 static void 
 handle_allocdirect_partdone(adp, wkhd)
 	struct allocdirect *adp;	/* the completed allocdirect */
 	struct workhead *wkhd;		/* Work to do when inode is writtne. */
 {
 	struct allocdirectlst *listhead;
 	struct allocdirect *listadp;
 	struct inodedep *inodedep;
 	long bsize;
 
 	if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	/*
 	 * The on-disk inode cannot claim to be any larger than the last
 	 * fragment that has been written. Otherwise, the on-disk inode
 	 * might have fragments that were not the last block in the file
 	 * which would corrupt the filesystem. Thus, we cannot free any
 	 * allocdirects after one whose ad_oldblkno claims a fragment as
 	 * these blocks must be rolled back to zero before writing the inode.
 	 * We check the currently active set of allocdirects in id_inoupdt
 	 * or id_extupdt as appropriate.
 	 */
 	inodedep = adp->ad_inodedep;
 	bsize = inodedep->id_fs->fs_bsize;
 	if (adp->ad_state & EXTDATA)
 		listhead = &inodedep->id_extupdt;
 	else
 		listhead = &inodedep->id_inoupdt;
 	TAILQ_FOREACH(listadp, listhead, ad_next) {
 		/* found our block */
 		if (listadp == adp)
 			break;
 		/* continue if ad_oldlbn is not a fragment */
 		if (listadp->ad_oldsize == 0 ||
 		    listadp->ad_oldsize == bsize)
 			continue;
 		/* hit a fragment */
 		return;
 	}
 	/*
 	 * If we have reached the end of the current list without
 	 * finding the just finished dependency, then it must be
 	 * on the future dependency list. Future dependencies cannot
 	 * be freed until they are moved to the current list.
 	 */
 	if (listadp == NULL) {
 #ifdef DEBUG
 		if (adp->ad_state & EXTDATA)
 			listhead = &inodedep->id_newextupdt;
 		else
 			listhead = &inodedep->id_newinoupdt;
 		TAILQ_FOREACH(listadp, listhead, ad_next)
 			/* found our block */
 			if (listadp == adp)
 				break;
 		if (listadp == NULL)
 			panic("handle_allocdirect_partdone: lost dep");
 #endif /* DEBUG */
 		return;
 	}
 	/*
 	 * If we have found the just finished dependency, then queue
 	 * it along with anything that follows it that is complete.
 	 * Since the pointer has not yet been written in the inode
 	 * as the dependency prevents it, place the allocdirect on the
 	 * bufwait list where it will be freed once the pointer is
 	 * valid.
 	 */
 	if (wkhd == NULL)
 		wkhd = &inodedep->id_bufwait;
 	for (; adp; adp = listadp) {
 		listadp = TAILQ_NEXT(adp, ad_next);
 		if ((adp->ad_state & ALLCOMPLETE) != ALLCOMPLETE)
 			return;
 		TAILQ_REMOVE(listhead, adp, ad_next);
 		WORKLIST_INSERT(wkhd, &adp->ad_block.nb_list);
 	}
 }
 
 /*
  * Called from within softdep_disk_write_complete above.  This routine
  * completes successfully written allocindirs.
  */
 static void
 handle_allocindir_partdone(aip)
 	struct allocindir *aip;		/* the completed allocindir */
 {
 	struct indirdep *indirdep;
 
 	if ((aip->ai_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	indirdep = aip->ai_indirdep;
 	LIST_REMOVE(aip, ai_next);
 	/*
 	 * Don't set a pointer while the buffer is undergoing IO or while
 	 * we have active truncations.
 	 */
 	if (indirdep->ir_state & UNDONE || !TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		LIST_INSERT_HEAD(&indirdep->ir_donehd, aip, ai_next);
 		return;
 	}
 	if (indirdep->ir_state & UFS1FMT)
 		((ufs1_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 		    aip->ai_newblkno;
 	else
 		((ufs2_daddr_t *)indirdep->ir_savebp->b_data)[aip->ai_offset] =
 		    aip->ai_newblkno;
 	/*
 	 * Await the pointer write before freeing the allocindir.
 	 */
 	LIST_INSERT_HEAD(&indirdep->ir_writehd, aip, ai_next);
 }
 
 /*
  * Release segments held on a jwork list.
  */
 static void
 handle_jwork(wkhd)
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 
 	while ((wk = LIST_FIRST(wkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_JSEGDEP:
 			free_jsegdep(WK_JSEGDEP(wk));
 			continue;
 		case D_FREEDEP:
 			free_freedep(WK_FREEDEP(wk));
 			continue;
 		case D_FREEFRAG:
 			rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep));
 			WORKITEM_FREE(wk, D_FREEFRAG);
 			continue;
 		case D_FREEWORK:
 			handle_written_freework(WK_FREEWORK(wk));
 			continue;
 		default:
 			panic("handle_jwork: Unknown type %s\n",
 			    TYPENAME(wk->wk_type));
 		}
 	}
 }
 
 /*
  * Handle the bufwait list on an inode when it is safe to release items
  * held there.  This normally happens after an inode block is written but
  * may be delayed and handled later if there are pending journal items that
  * are not yet safe to be released.
  */
 static struct freefile *
 handle_bufwait(inodedep, refhd)
 	struct inodedep *inodedep;
 	struct workhead *refhd;
 {
 	struct jaddref *jaddref;
 	struct freefile *freefile;
 	struct worklist *wk;
 
 	freefile = NULL;
 	while ((wk = LIST_FIRST(&inodedep->id_bufwait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		switch (wk->wk_type) {
 		case D_FREEFILE:
 			/*
 			 * We defer adding freefile to the worklist
 			 * until all other additions have been made to
 			 * ensure that it will be done after all the
 			 * old blocks have been freed.
 			 */
 			if (freefile != NULL)
 				panic("handle_bufwait: freefile");
 			freefile = WK_FREEFILE(wk);
 			continue;
 
 		case D_MKDIR:
 			handle_written_mkdir(WK_MKDIR(wk), MKDIR_PARENT);
 			continue;
 
 		case D_DIRADD:
 			diradd_inode_written(WK_DIRADD(wk), inodedep);
 			continue;
 
 		case D_FREEFRAG:
 			wk->wk_state |= COMPLETE;
 			if ((wk->wk_state & ALLCOMPLETE) == ALLCOMPLETE)
 				add_to_worklist(wk, 0);
 			continue;
 
 		case D_DIRREM:
 			wk->wk_state |= COMPLETE;
 			add_to_worklist(wk, 0);
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			free_newblk(WK_NEWBLK(wk));
 			continue;
 
 		case D_JNEWBLK:
 			wk->wk_state |= COMPLETE;
 			free_jnewblk(WK_JNEWBLK(wk));
 			continue;
 
 		/*
 		 * Save freed journal segments and add references on
 		 * the supplied list which will delay their release
 		 * until the cg bitmap is cleared on disk.
 		 */
 		case D_JSEGDEP:
 			if (refhd == NULL)
 				free_jsegdep(WK_JSEGDEP(wk));
 			else
 				WORKLIST_INSERT(refhd, wk);
 			continue;
 
 		case D_JADDREF:
 			jaddref = WK_JADDREF(wk);
 			TAILQ_REMOVE(&inodedep->id_inoreflst, &jaddref->ja_ref,
 			    if_deps);
 			/*
 			 * Transfer any jaddrefs to the list to be freed with
 			 * the bitmap if we're handling a removed file.
 			 */
 			if (refhd == NULL) {
 				wk->wk_state |= COMPLETE;
 				free_jaddref(jaddref);
 			} else
 				WORKLIST_INSERT(refhd, wk);
 			continue;
 
 		default:
 			panic("handle_bufwait: Unknown type %p(%s)",
 			    wk, TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 	return (freefile);
 }
 /*
  * Called from within softdep_disk_write_complete above to restore
  * in-memory inode block contents to their most up-to-date state. Note
  * that this routine is always called from interrupt level with further
  * interrupts from this device blocked.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int 
 handle_written_inodeblock(inodedep, bp, flags)
 	struct inodedep *inodedep;
 	struct buf *bp;		/* buffer containing the inode block */
 	int flags;
 {
 	struct freefile *freefile;
 	struct allocdirect *adp, *nextadp;
 	struct ufs1_dinode *dp1 = NULL;
 	struct ufs2_dinode *dp2 = NULL;
 	struct workhead wkhd;
 	int hadchanges, fstype;
 	ino_t freelink;
 
 	LIST_INIT(&wkhd);
 	hadchanges = 0;
 	freefile = NULL;
 	if ((inodedep->id_state & IOSTARTED) == 0)
 		panic("handle_written_inodeblock: not started");
 	inodedep->id_state &= ~IOSTARTED;
 	if (inodedep->id_fs->fs_magic == FS_UFS1_MAGIC) {
 		fstype = UFS1;
 		dp1 = (struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 		freelink = dp1->di_freelink;
 	} else {
 		fstype = UFS2;
 		dp2 = (struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(inodedep->id_fs, inodedep->id_ino);
 		freelink = dp2->di_freelink;
 	}
 	/*
 	 * Leave this inodeblock dirty until it's in the list.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) == UNLINKED &&
 	    (flags & WRITESUCCEEDED)) {
 		struct inodedep *inon;
 
 		inon = TAILQ_NEXT(inodedep, id_unlinked);
 		if ((inon == NULL && freelink == 0) ||
 		    (inon && inon->id_ino == freelink)) {
 			if (inon)
 				inon->id_state |= UNLINKPREV;
 			inodedep->id_state |= UNLINKNEXT;
 		}
 		hadchanges = 1;
 	}
 	/*
 	 * If we had to rollback the inode allocation because of
 	 * bitmaps being incomplete, then simply restore it.
 	 * Keep the block dirty so that it will not be reclaimed until
 	 * all associated dependencies have been cleared and the
 	 * corresponding updates written to disk.
 	 */
 	if (inodedep->id_savedino1 != NULL) {
 		hadchanges = 1;
 		if (fstype == UFS1)
 			*dp1 = *inodedep->id_savedino1;
 		else
 			*dp2 = *inodedep->id_savedino2;
 		free(inodedep->id_savedino1, M_SAVEDINO);
 		inodedep->id_savedino1 = NULL;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_inode_bitmap++;
 		bdirty(bp);
 		/*
 		 * If the inode is clear here and GOINGAWAY it will never
 		 * be written.  Process the bufwait and clear any pending
 		 * work which may include the freefile.
 		 */
 		if (inodedep->id_state & GOINGAWAY)
 			goto bufwait;
 		return (1);
 	}
 	if (flags & WRITESUCCEEDED)
 		inodedep->id_state |= COMPLETE;
 	/*
 	 * Roll forward anything that had to be rolled back before 
 	 * the inode could be updated.
 	 */
 	for (adp = TAILQ_FIRST(&inodedep->id_inoupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (fstype == UFS1) {
 			if (adp->ad_offset < NDADDR) {
 				if (dp1->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s %s #%jd mismatch %d != %jd",
 					    "handle_written_inodeblock:",
 					    "direct pointer",
 					    (intmax_t)adp->ad_offset,
 					    dp1->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
 				dp1->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
 				if (dp1->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %d",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
 					    (intmax_t)adp->ad_offset - NDADDR,
 					    dp1->di_ib[adp->ad_offset - NDADDR]);
 				dp1->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		} else {
 			if (adp->ad_offset < NDADDR) {
 				if (dp2->di_db[adp->ad_offset]!=adp->ad_oldblkno)
 					panic("%s: %s #%jd %s %jd != %jd",
 					    "handle_written_inodeblock",
 					    "direct pointer",
 					    (intmax_t)adp->ad_offset, "mismatch",
 					    (intmax_t)dp2->di_db[adp->ad_offset],
 					    (intmax_t)adp->ad_oldblkno);
 				dp2->di_db[adp->ad_offset] = adp->ad_newblkno;
 			} else {
 				if (dp2->di_ib[adp->ad_offset - NDADDR] != 0)
 					panic("%s: %s #%jd allocated as %jd",
 					    "handle_written_inodeblock",
 					    "indirect pointer",
 					    (intmax_t)adp->ad_offset - NDADDR,
 					    (intmax_t)
 					    dp2->di_ib[adp->ad_offset - NDADDR]);
 				dp2->di_ib[adp->ad_offset - NDADDR] =
 				    adp->ad_newblkno;
 			}
 		}
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	for (adp = TAILQ_FIRST(&inodedep->id_extupdt); adp; adp = nextadp) {
 		nextadp = TAILQ_NEXT(adp, ad_next);
 		if (adp->ad_state & ATTACHED)
 			panic("handle_written_inodeblock: new entry");
 		if (dp2->di_extb[adp->ad_offset] != adp->ad_oldblkno)
 			panic("%s: direct pointers #%jd %s %jd != %jd",
 			    "handle_written_inodeblock",
 			    (intmax_t)adp->ad_offset, "mismatch",
 			    (intmax_t)dp2->di_extb[adp->ad_offset],
 			    (intmax_t)adp->ad_oldblkno);
 		dp2->di_extb[adp->ad_offset] = adp->ad_newblkno;
 		adp->ad_state &= ~UNDONE;
 		adp->ad_state |= ATTACHED;
 		hadchanges = 1;
 	}
 	if (hadchanges && (bp->b_flags & B_DELWRI) == 0)
 		stat_direct_blk_ptrs++;
 	/*
 	 * Reset the file size to its most up-to-date value.
 	 */
 	if (inodedep->id_savedsize == -1 || inodedep->id_savedextsize == -1)
 		panic("handle_written_inodeblock: bad size");
 	if (inodedep->id_savednlink > LINK_MAX)
 		panic("handle_written_inodeblock: Invalid link count "
-		    "%d for inodedep %p", inodedep->id_savednlink, inodedep);
+		    "%jd for inodedep %p", (uintmax_t)inodedep->id_savednlink,
+		    inodedep);
 	if (fstype == UFS1) {
 		if (dp1->di_nlink != inodedep->id_savednlink) { 
 			dp1->di_nlink = inodedep->id_savednlink;
 			hadchanges = 1;
 		}
 		if (dp1->di_size != inodedep->id_savedsize) {
 			dp1->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
 		}
 	} else {
 		if (dp2->di_nlink != inodedep->id_savednlink) { 
 			dp2->di_nlink = inodedep->id_savednlink;
 			hadchanges = 1;
 		}
 		if (dp2->di_size != inodedep->id_savedsize) {
 			dp2->di_size = inodedep->id_savedsize;
 			hadchanges = 1;
 		}
 		if (dp2->di_extsize != inodedep->id_savedextsize) {
 			dp2->di_extsize = inodedep->id_savedextsize;
 			hadchanges = 1;
 		}
 	}
 	inodedep->id_savedsize = -1;
 	inodedep->id_savedextsize = -1;
 	inodedep->id_savednlink = -1;
 	/*
 	 * If there were any rollbacks in the inode block, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (hadchanges)
 		bdirty(bp);
 bufwait:
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0)
 		return (hadchanges);
 	/*
 	 * Process any allocdirects that completed during the update.
 	 */
 	if ((adp = TAILQ_FIRST(&inodedep->id_inoupdt)) != NULL)
 		handle_allocdirect_partdone(adp, &wkhd);
 	if ((adp = TAILQ_FIRST(&inodedep->id_extupdt)) != NULL)
 		handle_allocdirect_partdone(adp, &wkhd);
 	/*
 	 * Process deallocations that were held pending until the
 	 * inode had been written to disk. Freeing of the inode
 	 * is delayed until after all blocks have been freed to
 	 * avoid creation of new <vfsid, inum, lbn> triples
 	 * before the old ones have been deleted.  Completely
 	 * unlinked inodes are not processed until the unlinked
 	 * inode list is written or the last reference is removed.
 	 */
 	if ((inodedep->id_state & (UNLINKED | UNLINKONLIST)) != UNLINKED) {
 		freefile = handle_bufwait(inodedep, NULL);
 		if (freefile && !LIST_EMPTY(&wkhd)) {
 			WORKLIST_INSERT(&wkhd, &freefile->fx_list);
 			freefile = NULL;
 		}
 	}
 	/*
 	 * Move rolled forward dependency completions to the bufwait list
 	 * now that those that were already written have been processed.
 	 */
 	if (!LIST_EMPTY(&wkhd) && hadchanges == 0)
 		panic("handle_written_inodeblock: bufwait but no changes");
 	jwork_move(&inodedep->id_bufwait, &wkhd);
 
 	if (freefile != NULL) {
 		/*
 		 * If the inode is goingaway it was never written.  Fake up
 		 * the state here so free_inodedep() can succeed.
 		 */
 		if (inodedep->id_state & GOINGAWAY)
 			inodedep->id_state |= COMPLETE | DEPCOMPLETE;
 		if (free_inodedep(inodedep) == 0)
 			panic("handle_written_inodeblock: live inodedep %p",
 			    inodedep);
 		add_to_worklist(&freefile->fx_list, 0);
 		return (0);
 	}
 
 	/*
 	 * If no outstanding dependencies, free it.
 	 */
 	if (free_inodedep(inodedep) ||
 	    (TAILQ_FIRST(&inodedep->id_inoreflst) == 0 &&
 	     TAILQ_FIRST(&inodedep->id_inoupdt) == 0 &&
 	     TAILQ_FIRST(&inodedep->id_extupdt) == 0 &&
 	     LIST_FIRST(&inodedep->id_bufwait) == 0))
 		return (0);
 	return (hadchanges);
 }
 
 /*
  * Perform needed roll-forwards and kick off any dependencies that
  * can now be processed.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int
 handle_written_indirdep(indirdep, bp, bpp, flags)
 	struct indirdep *indirdep;
 	struct buf *bp;
 	struct buf **bpp;
 	int flags;
 {
 	struct allocindir *aip;
 	struct buf *sbp;
 	int chgs;
 
 	if (indirdep->ir_state & GOINGAWAY)
 		panic("handle_written_indirdep: indirdep gone");
 	if ((indirdep->ir_state & IOSTARTED) == 0)
 		panic("handle_written_indirdep: IO not started");
 	chgs = 0;
 	/*
 	 * If there were rollbacks revert them here.
 	 */
 	if (indirdep->ir_saveddata) {
 		bcopy(indirdep->ir_saveddata, bp->b_data, bp->b_bcount);
 		if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 			free(indirdep->ir_saveddata, M_INDIRDEP);
 			indirdep->ir_saveddata = NULL;
 		}
 		chgs = 1;
 	}
 	indirdep->ir_state &= ~(UNDONE | IOSTARTED);
 	indirdep->ir_state |= ATTACHED;
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0) {
 		stat_indir_blk_ptrs++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * Move allocindirs with written pointers to the completehd if
 	 * the indirdep's pointer is not yet written.  Otherwise
 	 * free them here.
 	 */
 	while ((aip = LIST_FIRST(&indirdep->ir_writehd)) != NULL) {
 		LIST_REMOVE(aip, ai_next);
 		if ((indirdep->ir_state & DEPCOMPLETE) == 0) {
 			LIST_INSERT_HEAD(&indirdep->ir_completehd, aip,
 			    ai_next);
 			newblk_freefrag(&aip->ai_block);
 			continue;
 		}
 		free_newblk(&aip->ai_block);
 	}
 	/*
 	 * Move allocindirs that have finished dependency processing from
 	 * the done list to the write list after updating the pointers.
 	 */
 	if (TAILQ_EMPTY(&indirdep->ir_trunc)) {
 		while ((aip = LIST_FIRST(&indirdep->ir_donehd)) != NULL) {
 			handle_allocindir_partdone(aip);
 			if (aip == LIST_FIRST(&indirdep->ir_donehd))
 				panic("disk_write_complete: not gone");
 			chgs = 1;
 		}
 	}
 	/*
 	 * Preserve the indirdep if there were any changes or if it is not
 	 * yet valid on disk.
 	 */
 	if (chgs) {
 		stat_indir_blk_ptrs++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * If there were no changes we can discard the savedbp and detach
 	 * ourselves from the buf.  We are only carrying completed pointers
 	 * in this case.
 	 */
 	sbp = indirdep->ir_savebp;
 	sbp->b_flags |= B_INVAL | B_NOCACHE;
 	indirdep->ir_savebp = NULL;
 	indirdep->ir_bp = NULL;
 	if (*bpp != NULL)
 		panic("handle_written_indirdep: bp already exists.");
 	*bpp = sbp;
 	/*
 	 * The indirdep may not be freed until its parent points at it.
 	 */
 	if (indirdep->ir_state & DEPCOMPLETE)
 		free_indirdep(indirdep);
 
 	return (0);
 }
 
 /*
  * Process a diradd entry after its dependent inode has been written.
  * This routine must be called with splbio interrupts blocked.
  */
 static void
 diradd_inode_written(dap, inodedep)
 	struct diradd *dap;
 	struct inodedep *inodedep;
 {
 
 	dap->da_state |= COMPLETE;
 	complete_diradd(dap);
 	WORKLIST_INSERT(&inodedep->id_pendinghd, &dap->da_list);
 }
 
 /*
  * Returns true if the bmsafemap will have rollbacks when written.  Must only
  * be called with the per-filesystem lock and the buf lock on the cg held.
  */
 static int
 bmsafemap_backgroundwrite(bmsafemap, bp)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;
 {
 	int dirty;
 
 	LOCK_OWNED(VFSTOUFS(bmsafemap->sm_list.wk_mp));
 	dirty = !LIST_EMPTY(&bmsafemap->sm_jaddrefhd) | 
 	    !LIST_EMPTY(&bmsafemap->sm_jnewblkhd);
 	/*
 	 * If we're initiating a background write we need to process the
 	 * rollbacks as they exist now, not as they exist when IO starts.
 	 * No other consumers will look at the contents of the shadowed
 	 * buf so this is safe to do here.
 	 */
 	if (bp->b_xflags & BX_BKGRDMARKER)
 		initiate_write_bmsafemap(bmsafemap, bp);
 
 	return (dirty);
 }
 
 /*
  * Re-apply an allocation when a cg write is complete.
  */
 static int
 jnewblk_rollforward(jnewblk, fs, cgp, blksfree)
 	struct jnewblk *jnewblk;
 	struct fs *fs;
 	struct cg *cgp;
 	uint8_t *blksfree;
 {
 	ufs1_daddr_t fragno;
 	ufs2_daddr_t blkno;
 	long cgbno, bbase;
 	int frags, blk;
 	int i;
 
 	frags = 0;
 	cgbno = dtogd(fs, jnewblk->jn_blkno);
 	for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) {
 		if (isclr(blksfree, cgbno + i))
 			panic("jnewblk_rollforward: re-allocated fragment");
 		frags++;
 	}
 	if (frags == fs->fs_frag) {
 		blkno = fragstoblks(fs, cgbno);
 		ffs_clrblock(fs, blksfree, (long)blkno);
 		ffs_clusteracct(fs, cgp, blkno, -1);
 		cgp->cg_cs.cs_nbfree--;
 	} else {
 		bbase = cgbno - fragnum(fs, cgbno);
 		cgbno += jnewblk->jn_oldfrags;
                 /* If a complete block had been reassembled, account for it. */
 		fragno = fragstoblks(fs, bbase);
 		if (ffs_isblock(fs, blksfree, fragno)) {
 			cgp->cg_cs.cs_nffree += fs->fs_frag;
 			ffs_clusteracct(fs, cgp, fragno, -1);
 			cgp->cg_cs.cs_nbfree--;
 		}
 		/* Decrement the old frags.  */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, -1);
 		/* Allocate the fragment */
 		for (i = 0; i < frags; i++)
 			clrbit(blksfree, cgbno + i);
 		cgp->cg_cs.cs_nffree -= frags;
 		/* Add back in counts associated with the new frags */
 		blk = blkmap(fs, blksfree, bbase);
 		ffs_fragacct(fs, blk, cgp->cg_frsum, 1);
 	}
 	return (frags);
 }
 
 /*
  * Complete a write to a bmsafemap structure.  Roll forward any bitmap
  * changes if it's not a background write.  Set all written dependencies 
  * to DEPCOMPLETE and free the structure if possible.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int
 handle_written_bmsafemap(bmsafemap, bp, flags)
 	struct bmsafemap *bmsafemap;
 	struct buf *bp;
 	int flags;
 {
 	struct newblk *newblk;
 	struct inodedep *inodedep;
 	struct jaddref *jaddref, *jatmp;
 	struct jnewblk *jnewblk, *jntmp;
 	struct ufsmount *ump;
 	uint8_t *inosused;
 	uint8_t *blksfree;
 	struct cg *cgp;
 	struct fs *fs;
 	ino_t ino;
 	int foreground;
 	int chgs;
 
 	if ((bmsafemap->sm_state & IOSTARTED) == 0)
 		panic("handle_written_bmsafemap: Not started\n");
 	ump = VFSTOUFS(bmsafemap->sm_list.wk_mp);
 	chgs = 0;
 	bmsafemap->sm_state &= ~IOSTARTED;
 	foreground = (bp->b_xflags & BX_BKGRDMARKER) == 0;
 	/*
 	 * If write was successful, release journal work that was waiting
 	 * on the write. Otherwise move the work back.
 	 */
 	if (flags & WRITESUCCEEDED)
 		handle_jwork(&bmsafemap->sm_freewr);
 	else
 		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
 		    worklist, wk_list);
 
 	/*
 	 * Restore unwritten inode allocation pending jaddref writes.
 	 */
 	if (!LIST_EMPTY(&bmsafemap->sm_jaddrefhd)) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		inosused = cg_inosused(cgp);
 		LIST_FOREACH_SAFE(jaddref, &bmsafemap->sm_jaddrefhd,
 		    ja_bmdeps, jatmp) {
 			if ((jaddref->ja_state & UNDONE) == 0)
 				continue;
 			ino = jaddref->ja_ino % fs->fs_ipg;
 			if (isset(inosused, ino))
 				panic("handle_written_bmsafemap: "
 				    "re-allocated inode");
 			/* Do the roll-forward only if it's a real copy. */
 			if (foreground) {
 				if ((jaddref->ja_mode & IFMT) == IFDIR)
 					cgp->cg_cs.cs_ndir++;
 				cgp->cg_cs.cs_nifree--;
 				setbit(inosused, ino);
 				chgs = 1;
 			}
 			jaddref->ja_state &= ~UNDONE;
 			jaddref->ja_state |= ATTACHED;
 			free_jaddref(jaddref);
 		}
 	}
 	/*
 	 * Restore any block allocations which are pending journal writes.
 	 */
 	if (LIST_FIRST(&bmsafemap->sm_jnewblkhd) != NULL) {
 		cgp = (struct cg *)bp->b_data;
 		fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs;
 		blksfree = cg_blksfree(cgp);
 		LIST_FOREACH_SAFE(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps,
 		    jntmp) {
 			if ((jnewblk->jn_state & UNDONE) == 0)
 				continue;
 			/* Do the roll-forward only if it's a real copy. */
 			if (foreground &&
 			    jnewblk_rollforward(jnewblk, fs, cgp, blksfree))
 				chgs = 1;
 			jnewblk->jn_state &= ~(UNDONE | NEWBLOCK);
 			jnewblk->jn_state |= ATTACHED;
 			free_jnewblk(jnewblk);
 		}
 	}
 	/*
 	 * If the write did not succeed, we have done all the roll-forward
 	 * operations, but we cannot take the actions that will allow its
 	 * dependencies to be processed.
 	 */
 	if ((flags & WRITESUCCEEDED) == 0) {
 		LIST_CONCAT(&bmsafemap->sm_newblkhd, &bmsafemap->sm_newblkwr,
 		    newblk, nb_deps);
 		LIST_CONCAT(&bmsafemap->sm_freehd, &bmsafemap->sm_freewr,
 		    worklist, wk_list);
 		if (foreground)
 			bdirty(bp);
 		return (1);
 	}
 	while ((newblk = LIST_FIRST(&bmsafemap->sm_newblkwr))) {
 		newblk->nb_state |= DEPCOMPLETE;
 		newblk->nb_state &= ~ONDEPLIST;
 		newblk->nb_bmsafemap = NULL;
 		LIST_REMOVE(newblk, nb_deps);
 		if (newblk->nb_list.wk_type == D_ALLOCDIRECT)
 			handle_allocdirect_partdone(
 			    WK_ALLOCDIRECT(&newblk->nb_list), NULL);
 		else if (newblk->nb_list.wk_type == D_ALLOCINDIR)
 			handle_allocindir_partdone(
 			    WK_ALLOCINDIR(&newblk->nb_list));
 		else if (newblk->nb_list.wk_type != D_NEWBLK)
 			panic("handle_written_bmsafemap: Unexpected type: %s",
 			    TYPENAME(newblk->nb_list.wk_type));
 	}
 	while ((inodedep = LIST_FIRST(&bmsafemap->sm_inodedepwr)) != NULL) {
 		inodedep->id_state |= DEPCOMPLETE;
 		inodedep->id_state &= ~ONDEPLIST;
 		LIST_REMOVE(inodedep, id_deps);
 		inodedep->id_bmsafemap = NULL;
 	}
 	LIST_REMOVE(bmsafemap, sm_next);
 	if (chgs == 0 && LIST_EMPTY(&bmsafemap->sm_jaddrefhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_jnewblkhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_newblkhd) &&
 	    LIST_EMPTY(&bmsafemap->sm_inodedephd) &&
 	    LIST_EMPTY(&bmsafemap->sm_freehd)) {
 		LIST_REMOVE(bmsafemap, sm_hash);
 		WORKITEM_FREE(bmsafemap, D_BMSAFEMAP);
 		return (0);
 	}
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, bmsafemap, sm_next);
 	if (foreground)
 		bdirty(bp);
 	return (1);
 }
 
 /*
  * Try to free a mkdir dependency.
  */
 static void
 complete_mkdir(mkdir)
 	struct mkdir *mkdir;
 {
 	struct diradd *dap;
 
 	if ((mkdir->md_state & ALLCOMPLETE) != ALLCOMPLETE)
 		return;
 	LIST_REMOVE(mkdir, md_mkdirs);
 	dap = mkdir->md_diradd;
 	dap->da_state &= ~(mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY));
 	if ((dap->da_state & (MKDIR_PARENT | MKDIR_BODY)) == 0) {
 		dap->da_state |= DEPCOMPLETE;
 		complete_diradd(dap);
 	}
 	WORKITEM_FREE(mkdir, D_MKDIR);
 }
 
 /*
  * Handle the completion of a mkdir dependency.
  */
 static void
 handle_written_mkdir(mkdir, type)
 	struct mkdir *mkdir;
 	int type;
 {
 
 	if ((mkdir->md_state & (MKDIR_PARENT | MKDIR_BODY)) != type)
 		panic("handle_written_mkdir: bad type");
 	mkdir->md_state |= COMPLETE;
 	complete_mkdir(mkdir);
 }
 
 static int
 free_pagedep(pagedep)
 	struct pagedep *pagedep;
 {
 	int i;
 
 	if (pagedep->pd_state & NEWBLOCK)
 		return (0);
 	if (!LIST_EMPTY(&pagedep->pd_dirremhd))
 		return (0);
 	for (i = 0; i < DAHASHSZ; i++)
 		if (!LIST_EMPTY(&pagedep->pd_diraddhd[i]))
 			return (0);
 	if (!LIST_EMPTY(&pagedep->pd_pendinghd))
 		return (0);
 	if (!LIST_EMPTY(&pagedep->pd_jmvrefhd))
 		return (0);
 	if (pagedep->pd_state & ONWORKLIST)
 		WORKLIST_REMOVE(&pagedep->pd_list);
 	LIST_REMOVE(pagedep, pd_hash);
 	WORKITEM_FREE(pagedep, D_PAGEDEP);
 
 	return (1);
 }
 
 /*
  * Called from within softdep_disk_write_complete above.
  * A write operation was just completed. Removed inodes can
  * now be freed and associated block pointers may be committed.
  * Note that this routine is always called from interrupt level
  * with further interrupts from this device blocked.
  *
  * If the write did not succeed, we will do all the roll-forward
  * operations, but we will not take the actions that will allow its
  * dependencies to be processed.
  */
 static int 
 handle_written_filepage(pagedep, bp, flags)
 	struct pagedep *pagedep;
 	struct buf *bp;		/* buffer containing the written page */
 	int flags;
 {
 	struct dirrem *dirrem;
 	struct diradd *dap, *nextdap;
 	struct direct *ep;
 	int i, chgs;
 
 	if ((pagedep->pd_state & IOSTARTED) == 0)
 		panic("handle_written_filepage: not started");
 	pagedep->pd_state &= ~IOSTARTED;
 	if ((flags & WRITESUCCEEDED) == 0)
 		goto rollforward;
 	/*
 	 * Process any directory removals that have been committed.
 	 */
 	while ((dirrem = LIST_FIRST(&pagedep->pd_dirremhd)) != NULL) {
 		LIST_REMOVE(dirrem, dm_next);
 		dirrem->dm_state |= COMPLETE;
 		dirrem->dm_dirinum = pagedep->pd_ino;
 		KASSERT(LIST_EMPTY(&dirrem->dm_jremrefhd),
 		    ("handle_written_filepage: Journal entries not written."));
 		add_to_worklist(&dirrem->dm_list, 0);
 	}
 	/*
 	 * Free any directory additions that have been committed.
 	 * If it is a newly allocated block, we have to wait until
 	 * the on-disk directory inode claims the new block.
 	 */
 	if ((pagedep->pd_state & NEWBLOCK) == 0)
 		while ((dap = LIST_FIRST(&pagedep->pd_pendinghd)) != NULL)
 			free_diradd(dap, NULL);
 rollforward:
 	/*
 	 * Uncommitted directory entries must be restored.
 	 */
 	for (chgs = 0, i = 0; i < DAHASHSZ; i++) {
 		for (dap = LIST_FIRST(&pagedep->pd_diraddhd[i]); dap;
 		     dap = nextdap) {
 			nextdap = LIST_NEXT(dap, da_pdlist);
 			if (dap->da_state & ATTACHED)
 				panic("handle_written_filepage: attached");
 			ep = (struct direct *)
 			    ((char *)bp->b_data + dap->da_offset);
 			ep->d_ino = dap->da_newinum;
 			dap->da_state &= ~UNDONE;
 			dap->da_state |= ATTACHED;
 			chgs = 1;
 			/*
 			 * If the inode referenced by the directory has
 			 * been written out, then the dependency can be
 			 * moved to the pending list.
 			 */
 			if ((dap->da_state & ALLCOMPLETE) == ALLCOMPLETE) {
 				LIST_REMOVE(dap, da_pdlist);
 				LIST_INSERT_HEAD(&pagedep->pd_pendinghd, dap,
 				    da_pdlist);
 			}
 		}
 	}
 	/*
 	 * If there were any rollbacks in the directory, then it must be
 	 * marked dirty so that its will eventually get written back in
 	 * its correct form.
 	 */
 	if (chgs || (flags & WRITESUCCEEDED) == 0) {
 		if ((bp->b_flags & B_DELWRI) == 0)
 			stat_dir_entry++;
 		bdirty(bp);
 		return (1);
 	}
 	/*
 	 * If we are not waiting for a new directory block to be
 	 * claimed by its inode, then the pagedep will be freed.
 	 * Otherwise it will remain to track any new entries on
 	 * the page in case they are fsync'ed.
 	 */
 	free_pagedep(pagedep);
 	return (0);
 }
 
 /*
  * Writing back in-core inode structures.
  * 
  * The filesystem only accesses an inode's contents when it occupies an
  * "in-core" inode structure.  These "in-core" structures are separate from
  * the page frames used to cache inode blocks.  Only the latter are
  * transferred to/from the disk.  So, when the updated contents of the
  * "in-core" inode structure are copied to the corresponding in-memory inode
  * block, the dependencies are also transferred.  The following procedure is
  * called when copying a dirty "in-core" inode to a cached inode block.
  */
 
 /*
  * Called when an inode is loaded from disk. If the effective link count
  * differed from the actual link count when it was last flushed, then we
  * need to ensure that the correct effective link count is put back.
  */
 void 
 softdep_load_inodeblock(ip)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_load_inodeblock called on non-softdep filesystem"));
 	/*
 	 * Check for alternate nlink count.
 	 */
 	ip->i_effnlink = ip->i_nlink;
 	ACQUIRE_LOCK(ump);
 	if (inodedep_lookup(UFSTOVFS(ump), ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 	ip->i_effnlink -= inodedep->id_nlinkdelta;
 	FREE_LOCK(ump);
 }
 
 /*
  * This routine is called just before the "in-core" inode
  * information is to be copied to the in-memory inode block.
  * Recall that an inode block contains several inodes. If
  * the force flag is set, then the dependencies will be
  * cleared so that the update can always be made. Note that
  * the buffer is locked when this routine is called, so we
  * will never be in the middle of writing the inode block 
  * to disk.
  */
 void 
 softdep_update_inodeblock(ip, bp, waitfor)
 	struct inode *ip;	/* the "in_core" copy of the inode */
 	struct buf *bp;		/* the buffer containing the inode block */
 	int waitfor;		/* nonzero => update must be allowed */
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct mount *mp;
 	struct buf *ibp;
 	struct fs *fs;
 	int error;
 
 	ump = ITOUMP(ip);
 	mp = UFSTOVFS(ump);
 	KASSERT(MOUNTEDSOFTDEP(mp) != 0,
 	    ("softdep_update_inodeblock called on non-softdep filesystem"));
 	fs = ump->um_fs;
 	/*
 	 * Preserve the freelink that is on disk.  clear_unlinked_inodedep()
 	 * does not have access to the in-core ip so must write directly into
 	 * the inode block buffer when setting freelink.
 	 */
 	if (fs->fs_magic == FS_UFS1_MAGIC)
 		DIP_SET(ip, i_freelink, ((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
 	else
 		DIP_SET(ip, i_freelink, ((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number))->di_freelink);
 	/*
 	 * If the effective link count is not equal to the actual link
 	 * count, then we must track the difference in an inodedep while
 	 * the inode is (potentially) tossed out of the cache. Otherwise,
 	 * if there is no existing inodedep, then there are no dependencies
 	 * to track.
 	 */
 	ACQUIRE_LOCK(ump);
 again:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		if (ip->i_effnlink != ip->i_nlink)
 			panic("softdep_update_inodeblock: bad link count");
 		return;
 	}
 	if (inodedep->id_nlinkdelta != ip->i_nlink - ip->i_effnlink)
 		panic("softdep_update_inodeblock: bad delta");
 	/*
 	 * If we're flushing all dependencies we must also move any waiting
 	 * for journal writes onto the bufwait list prior to I/O.
 	 */
 	if (waitfor) {
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto again;
 			}
 		}
 	}
 	/*
 	 * Changes have been initiated. Anything depending on these
 	 * changes cannot occur until this inode has been written.
 	 */
 	inodedep->id_state &= ~COMPLETE;
 	if ((inodedep->id_state & ONWORKLIST) == 0)
 		WORKLIST_INSERT(&bp->b_dep, &inodedep->id_list);
 	/*
 	 * Any new dependencies associated with the incore inode must 
 	 * now be moved to the list associated with the buffer holding
 	 * the in-memory copy of the inode. Once merged process any
 	 * allocdirects that are completed by the merger.
 	 */
 	merge_inode_lists(&inodedep->id_newinoupdt, &inodedep->id_inoupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_inoupdt))
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_inoupdt),
 		    NULL);
 	merge_inode_lists(&inodedep->id_newextupdt, &inodedep->id_extupdt);
 	if (!TAILQ_EMPTY(&inodedep->id_extupdt))
 		handle_allocdirect_partdone(TAILQ_FIRST(&inodedep->id_extupdt),
 		    NULL);
 	/*
 	 * Now that the inode has been pushed into the buffer, the
 	 * operations dependent on the inode being written to disk
 	 * can be moved to the id_bufwait so that they will be
 	 * processed when the buffer I/O completes.
 	 */
 	while ((wk = LIST_FIRST(&inodedep->id_inowait)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&inodedep->id_bufwait, wk);
 	}
 	/*
 	 * Newly allocated inodes cannot be written until the bitmap
 	 * that allocates them have been written (indicated by
 	 * DEPCOMPLETE being set in id_state). If we are doing a
 	 * forced sync (e.g., an fsync on a file), we force the bitmap
 	 * to be written so that the update can be done.
 	 */
 	if (waitfor == 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 retry:
 	if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) != 0) {
 		FREE_LOCK(ump);
 		return;
 	}
 	ibp = inodedep->id_bmsafemap->sm_buf;
 	ibp = getdirtybuf(ibp, LOCK_PTR(ump), MNT_WAIT);
 	if (ibp == NULL) {
 		/*
 		 * If ibp came back as NULL, the dependency could have been
 		 * freed while we slept.  Look it up again, and check to see
 		 * that it has completed.
 		 */
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0)
 			goto retry;
 		FREE_LOCK(ump);
 		return;
 	}
 	FREE_LOCK(ump);
 	if ((error = bwrite(ibp)) != 0)
 		softdep_error("softdep_update_inodeblock: bwrite", error);
 }
 
 /*
  * Merge the a new inode dependency list (such as id_newinoupdt) into an
  * old inode dependency list (such as id_inoupdt). This routine must be
  * called with splbio interrupts blocked.
  */
 static void
 merge_inode_lists(newlisthead, oldlisthead)
 	struct allocdirectlst *newlisthead;
 	struct allocdirectlst *oldlisthead;
 {
 	struct allocdirect *listadp, *newadp;
 
 	newadp = TAILQ_FIRST(newlisthead);
 	for (listadp = TAILQ_FIRST(oldlisthead); listadp && newadp;) {
 		if (listadp->ad_offset < newadp->ad_offset) {
 			listadp = TAILQ_NEXT(listadp, ad_next);
 			continue;
 		}
 		TAILQ_REMOVE(newlisthead, newadp, ad_next);
 		TAILQ_INSERT_BEFORE(listadp, newadp, ad_next);
 		if (listadp->ad_offset == newadp->ad_offset) {
 			allocdirect_merge(oldlisthead, newadp,
 			    listadp);
 			listadp = newadp;
 		}
 		newadp = TAILQ_FIRST(newlisthead);
 	}
 	while ((newadp = TAILQ_FIRST(newlisthead)) != NULL) {
 		TAILQ_REMOVE(newlisthead, newadp, ad_next);
 		TAILQ_INSERT_TAIL(oldlisthead, newadp, ad_next);
 	}
 }
 
 /*
  * If we are doing an fsync, then we must ensure that any directory
  * entries for the inode have been written after the inode gets to disk.
  */
 int
 softdep_fsync(vp)
 	struct vnode *vp;	/* the "in_core" copy of the inode */
 {
 	struct inodedep *inodedep;
 	struct pagedep *pagedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct worklist *wk;
 	struct diradd *dap;
 	struct mount *mp;
 	struct vnode *pvp;
 	struct inode *ip;
 	struct buf *bp;
 	struct fs *fs;
 	struct thread *td = curthread;
 	int error, flushparent, pagedep_new_block;
 	ino_t parentino;
 	ufs_lbn_t lbn;
 
 	ip = VTOI(vp);
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	if (MOUNTEDSOFTDEP(mp) == 0)
 		return (0);
 	ACQUIRE_LOCK(ump);
 restart:
 	if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0) {
 		FREE_LOCK(ump);
 		return (0);
 	}
 	TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 		if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 		    == DEPCOMPLETE) {
 			jwait(&inoref->if_list, MNT_WAIT);
 			goto restart;
 		}
 	}
 	if (!LIST_EMPTY(&inodedep->id_inowait) ||
 	    !TAILQ_EMPTY(&inodedep->id_extupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newextupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_inoupdt) ||
 	    !TAILQ_EMPTY(&inodedep->id_newinoupdt))
 		panic("softdep_fsync: pending ops %p", inodedep);
 	for (error = 0, flushparent = 0; ; ) {
 		if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) == NULL)
 			break;
 		if (wk->wk_type != D_DIRADD)
 			panic("softdep_fsync: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 		dap = WK_DIRADD(wk);
 		/*
 		 * Flush our parent if this directory entry has a MKDIR_PARENT
 		 * dependency or is contained in a newly allocated block.
 		 */
 		if (dap->da_state & DIRCHG)
 			pagedep = dap->da_previous->dm_pagedep;
 		else
 			pagedep = dap->da_pagedep;
 		parentino = pagedep->pd_ino;
 		lbn = pagedep->pd_lbn;
 		if ((dap->da_state & (MKDIR_BODY | COMPLETE)) != COMPLETE)
 			panic("softdep_fsync: dirty");
 		if ((dap->da_state & MKDIR_PARENT) ||
 		    (pagedep->pd_state & NEWBLOCK))
 			flushparent = 1;
 		else
 			flushparent = 0;
 		/*
 		 * If we are being fsync'ed as part of vgone'ing this vnode,
 		 * then we will not be able to release and recover the
 		 * vnode below, so we just have to give up on writing its
 		 * directory entry out. It will eventually be written, just
 		 * not now, but then the user was not asking to have it
 		 * written, so we are not breaking any promises.
 		 */
 		if (vp->v_iflag & VI_DOOMED)
 			break;
 		/*
 		 * We prevent deadlock by always fetching inodes from the
 		 * root, moving down the directory tree. Thus, when fetching
 		 * our parent directory, we first try to get the lock. If
 		 * that fails, we must unlock ourselves before requesting
 		 * the lock on our parent. See the comment in ufs_lookup
 		 * for details on possible races.
 		 */
 		FREE_LOCK(ump);
 		if (ffs_vgetf(mp, parentino, LK_NOWAIT | LK_EXCLUSIVE, &pvp,
 		    FFSV_FORCEINSMQ)) {
 			error = vfs_busy(mp, MBF_NOWAIT);
 			if (error != 0) {
 				vfs_ref(mp);
 				VOP_UNLOCK(vp, 0);
 				error = vfs_busy(mp, 0);
 				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 				vfs_rel(mp);
 				if (error != 0)
 					return (ENOENT);
 				if (vp->v_iflag & VI_DOOMED) {
 					vfs_unbusy(mp);
 					return (ENOENT);
 				}
 			}
 			VOP_UNLOCK(vp, 0);
 			error = ffs_vgetf(mp, parentino, LK_EXCLUSIVE,
 			    &pvp, FFSV_FORCEINSMQ);
 			vfs_unbusy(mp);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 			if (vp->v_iflag & VI_DOOMED) {
 				if (error == 0)
 					vput(pvp);
 				error = ENOENT;
 			}
 			if (error != 0)
 				return (error);
 		}
 		/*
 		 * All MKDIR_PARENT dependencies and all the NEWBLOCK pagedeps
 		 * that are contained in direct blocks will be resolved by 
 		 * doing a ffs_update. Pagedeps contained in indirect blocks
 		 * may require a complete sync'ing of the directory. So, we
 		 * try the cheap and fast ffs_update first, and if that fails,
 		 * then we do the slower ffs_syncvnode of the directory.
 		 */
 		if (flushparent) {
 			int locked;
 
 			if ((error = ffs_update(pvp, 1)) != 0) {
 				vput(pvp);
 				return (error);
 			}
 			ACQUIRE_LOCK(ump);
 			locked = 1;
 			if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) != 0) {
 				if ((wk = LIST_FIRST(&inodedep->id_pendinghd)) != NULL) {
 					if (wk->wk_type != D_DIRADD)
 						panic("softdep_fsync: Unexpected type %s",
 						      TYPENAME(wk->wk_type));
 					dap = WK_DIRADD(wk);
 					if (dap->da_state & DIRCHG)
 						pagedep = dap->da_previous->dm_pagedep;
 					else
 						pagedep = dap->da_pagedep;
 					pagedep_new_block = pagedep->pd_state & NEWBLOCK;
 					FREE_LOCK(ump);
 					locked = 0;
 					if (pagedep_new_block && (error =
 					    ffs_syncvnode(pvp, MNT_WAIT, 0))) {
 						vput(pvp);
 						return (error);
 					}
 				}
 			}
 			if (locked)
 				FREE_LOCK(ump);
 		}
 		/*
 		 * Flush directory page containing the inode's name.
 		 */
 		error = bread(pvp, lbn, blksize(fs, VTOI(pvp), lbn), td->td_ucred,
 		    &bp);
 		if (error == 0)
 			error = bwrite(bp);
 		else
 			brelse(bp);
 		vput(pvp);
 		if (error != 0)
 			return (error);
 		ACQUIRE_LOCK(ump);
 		if (inodedep_lookup(mp, ip->i_number, 0, &inodedep) == 0)
 			break;
 	}
 	FREE_LOCK(ump);
 	return (0);
 }
 
 /*
  * Flush all the dirty bitmaps associated with the block device
  * before flushing the rest of the dirty blocks so as to reduce
  * the number of dependencies that will have to be rolled back.
  *
  * XXX Unused?
  */
 void
 softdep_fsync_mountdev(vp)
 	struct vnode *vp;
 {
 	struct buf *bp, *nbp;
 	struct worklist *wk;
 	struct bufobj *bo;
 
 	if (!vn_isdisk(vp, NULL))
 		panic("softdep_fsync_mountdev: vnode not a disk");
 	bo = &vp->v_bufobj;
 restart:
 	BO_LOCK(bo);
 	TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
 		/* 
 		 * If it is already scheduled, skip to the next buffer.
 		 */
 		if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL))
 			continue;
 
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("softdep_fsync_mountdev: not dirty");
 		/*
 		 * We are only interested in bitmaps with outstanding
 		 * dependencies.
 		 */
 		if ((wk = LIST_FIRST(&bp->b_dep)) == NULL ||
 		    wk->wk_type != D_BMSAFEMAP ||
 		    (bp->b_vflags & BV_BKGRDINPROG)) {
 			BUF_UNLOCK(bp);
 			continue;
 		}
 		BO_UNLOCK(bo);
 		bremfree(bp);
 		(void) bawrite(bp);
 		goto restart;
 	}
 	drain_output(vp);
 	BO_UNLOCK(bo);
 }
 
 /*
  * Sync all cylinder groups that were dirty at the time this function is
  * called.  Newly dirtied cgs will be inserted before the sentinel.  This
  * is used to flush freedep activity that may be holding up writes to a
  * indirect block.
  */
 static int
 sync_cgs(mp, waitfor)
 	struct mount *mp;
 	int waitfor;
 {
 	struct bmsafemap *bmsafemap;
 	struct bmsafemap *sentinel;
 	struct ufsmount *ump;
 	struct buf *bp;
 	int error;
 
 	sentinel = malloc(sizeof(*sentinel), M_BMSAFEMAP, M_ZERO | M_WAITOK);
 	sentinel->sm_cg = -1;
 	ump = VFSTOUFS(mp);
 	error = 0;
 	ACQUIRE_LOCK(ump);
 	LIST_INSERT_HEAD(&ump->softdep_dirtycg, sentinel, sm_next);
 	for (bmsafemap = LIST_NEXT(sentinel, sm_next); bmsafemap != NULL;
 	    bmsafemap = LIST_NEXT(sentinel, sm_next)) {
 		/* Skip sentinels and cgs with no work to release. */
 		if (bmsafemap->sm_cg == -1 ||
 		    (LIST_EMPTY(&bmsafemap->sm_freehd) &&
 		    LIST_EMPTY(&bmsafemap->sm_freewr))) {
 			LIST_REMOVE(sentinel, sm_next);
 			LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 			continue;
 		}
 		/*
 		 * If we don't get the lock and we're waiting try again, if
 		 * not move on to the next buf and try to sync it.
 		 */
 		bp = getdirtybuf(bmsafemap->sm_buf, LOCK_PTR(ump), waitfor);
 		if (bp == NULL && waitfor == MNT_WAIT)
 			continue;
 		LIST_REMOVE(sentinel, sm_next);
 		LIST_INSERT_AFTER(bmsafemap, sentinel, sm_next);
 		if (bp == NULL)
 			continue;
 		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else
 			error = bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		if (error)
 			break;
 	}
 	LIST_REMOVE(sentinel, sm_next);
 	FREE_LOCK(ump);
 	free(sentinel, M_BMSAFEMAP);
 	return (error);
 }
 
 /*
  * This routine is called when we are trying to synchronously flush a
  * file. This routine must eliminate any filesystem metadata dependencies
  * so that the syncing routine can succeed.
  */
 int
 softdep_sync_metadata(struct vnode *vp)
 {
 	struct inode *ip;
 	int error;
 
 	ip = VTOI(vp);
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_sync_metadata called on non-softdep filesystem"));
 	/*
 	 * Ensure that any direct block dependencies have been cleared,
 	 * truncations are started, and inode references are journaled.
 	 */
 	ACQUIRE_LOCK(VFSTOUFS(vp->v_mount));
 	/*
 	 * Write all journal records to prevent rollbacks on devvp.
 	 */
 	if (vp->v_type == VCHR)
 		softdep_flushjournal(vp->v_mount);
 	error = flush_inodedep_deps(vp, vp->v_mount, ip->i_number);
 	/*
 	 * Ensure that all truncates are written so we won't find deps on
 	 * indirect blocks.
 	 */
 	process_truncates(vp);
 	FREE_LOCK(VFSTOUFS(vp->v_mount));
 
 	return (error);
 }
 
 /*
  * This routine is called when we are attempting to sync a buf with
  * dependencies.  If waitfor is MNT_NOWAIT it attempts to schedule any
  * other IO it can but returns EBUSY if the buffer is not yet able to
  * be written.  Dependencies which will not cause rollbacks will always
  * return 0.
  */
 int
 softdep_sync_buf(struct vnode *vp, struct buf *bp, int waitfor)
 {
 	struct indirdep *indirdep;
 	struct pagedep *pagedep;
 	struct allocindir *aip;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct buf *nbp;
 	struct worklist *wk;
 	int i, error;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_sync_buf called on non-softdep filesystem"));
 	/*
 	 * For VCHR we just don't want to force flush any dependencies that
 	 * will cause rollbacks.
 	 */
 	if (vp->v_type == VCHR) {
 		if (waitfor == MNT_NOWAIT && softdep_count_dependencies(bp, 0))
 			return (EBUSY);
 		return (0);
 	}
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * As we hold the buffer locked, none of its dependencies
 	 * will disappear.
 	 */
 	error = 0;
 top:
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk != NULL) {
 				if (waitfor == MNT_NOWAIT) {
 					error = EBUSY;
 					goto out_unlock;
 				}
 				jwait(&newblk->nb_jnewblk->jn_list, waitfor);
 				goto top;
 			}
 			if (newblk->nb_state & DEPCOMPLETE ||
 			    waitfor == MNT_NOWAIT)
 				continue;
 			nbp = newblk->nb_bmsafemap->sm_buf;
 			nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 			if (nbp == NULL)
 				goto top;
 			FREE_LOCK(ump);
 			if ((error = bwrite(nbp)) != 0)
 				goto out;
 			ACQUIRE_LOCK(ump);
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 			if (waitfor == MNT_NOWAIT) {
 				if (!TAILQ_EMPTY(&indirdep->ir_trunc) ||
 				    !LIST_EMPTY(&indirdep->ir_deplisthd)) {
 					error = EBUSY;
 					goto out_unlock;
 				}
 			}
 			if (!TAILQ_EMPTY(&indirdep->ir_trunc))
 				panic("softdep_sync_buf: truncation pending.");
 		restart:
 			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
 				newblk = (struct newblk *)aip;
 				if (newblk->nb_jnewblk != NULL) {
 					jwait(&newblk->nb_jnewblk->jn_list,
 					    waitfor);
 					goto restart;
 				}
 				if (newblk->nb_state & DEPCOMPLETE)
 					continue;
 				nbp = newblk->nb_bmsafemap->sm_buf;
 				nbp = getdirtybuf(nbp, LOCK_PTR(ump), waitfor);
 				if (nbp == NULL)
 					goto restart;
 				FREE_LOCK(ump);
 				if ((error = bwrite(nbp)) != 0)
 					goto out;
 				ACQUIRE_LOCK(ump);
 				goto restart;
 			}
 			continue;
 
 		case D_PAGEDEP:
 			/*
 			 * Only flush directory entries in synchronous passes.
 			 */
 			if (waitfor != MNT_WAIT) {
 				error = EBUSY;
 				goto out_unlock;
 			}
 			/*
 			 * While syncing snapshots, we must allow recursive
 			 * lookups.
 			 */
 			BUF_AREC(bp);
 			/*
 			 * We are trying to sync a directory that may
 			 * have dependencies on both its own metadata
 			 * and/or dependencies on the inodes of any
 			 * recently allocated files. We walk its diradd
 			 * lists pushing out the associated inode.
 			 */
 			pagedep = WK_PAGEDEP(wk);
 			for (i = 0; i < DAHASHSZ; i++) {
 				if (LIST_FIRST(&pagedep->pd_diraddhd[i]) == 0)
 					continue;
 				if ((error = flush_pagedep_deps(vp, wk->wk_mp,
 				    &pagedep->pd_diraddhd[i]))) {
 					BUF_NOREC(bp);
 					goto out_unlock;
 				}
 			}
 			BUF_NOREC(bp);
 			continue;
 
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 		case D_JNEWBLK:
 			continue;
 
 		default:
 			panic("softdep_sync_buf: Unknown type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 out_unlock:
 	FREE_LOCK(ump);
 out:
 	return (error);
 }
 
 /*
  * Flush the dependencies associated with an inodedep.
  * Called with splbio blocked.
  */
 static int
 flush_inodedep_deps(vp, mp, ino)
 	struct vnode *vp;
 	struct mount *mp;
 	ino_t ino;
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	int error, waitfor;
 
 	/*
 	 * This work is done in two passes. The first pass grabs most
 	 * of the buffers and begins asynchronously writing them. The
 	 * only way to wait for these asynchronous writes is to sleep
 	 * on the filesystem vnode which may stay busy for a long time
 	 * if the filesystem is active. So, instead, we make a second
 	 * pass over the dependencies blocking on each write. In the
 	 * usual case we will be blocking against a write that we
 	 * initiated, so when it is done the dependency will have been
 	 * resolved. Thus the second pass is expected to end quickly.
 	 * We give a brief window at the top of the loop to allow
 	 * any pending I/O to complete.
 	 */
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	for (error = 0, waitfor = MNT_NOWAIT; ; ) {
 		if (error)
 			return (error);
 		FREE_LOCK(ump);
 		ACQUIRE_LOCK(ump);
 restart:
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			return (0);
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto restart;
 			}
 		}
 		if (flush_deplist(&inodedep->id_inoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_newinoupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_extupdt, waitfor, &error) ||
 		    flush_deplist(&inodedep->id_newextupdt, waitfor, &error))
 			continue;
 		/*
 		 * If pass2, we are done, otherwise do pass 2.
 		 */
 		if (waitfor == MNT_WAIT)
 			break;
 		waitfor = MNT_WAIT;
 	}
 	/*
 	 * Try freeing inodedep in case all dependencies have been removed.
 	 */
 	if (inodedep_lookup(mp, ino, 0, &inodedep) != 0)
 		(void) free_inodedep(inodedep);
 	return (0);
 }
 
 /*
  * Flush an inode dependency list.
  * Called with splbio blocked.
  */
 static int
 flush_deplist(listhead, waitfor, errorp)
 	struct allocdirectlst *listhead;
 	int waitfor;
 	int *errorp;
 {
 	struct allocdirect *adp;
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct buf *bp;
 
 	if ((adp = TAILQ_FIRST(listhead)) == NULL)
 		return (0);
 	ump = VFSTOUFS(adp->ad_list.wk_mp);
 	LOCK_OWNED(ump);
 	TAILQ_FOREACH(adp, listhead, ad_next) {
 		newblk = (struct newblk *)adp;
 		if (newblk->nb_jnewblk != NULL) {
 			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			return (1);
 		}
 		if (newblk->nb_state & DEPCOMPLETE)
 			continue;
 		bp = newblk->nb_bmsafemap->sm_buf;
 		bp = getdirtybuf(bp, LOCK_PTR(ump), waitfor);
 		if (bp == NULL) {
 			if (waitfor == MNT_NOWAIT)
 				continue;
 			return (1);
 		}
 		FREE_LOCK(ump);
 		if (waitfor == MNT_NOWAIT)
 			bawrite(bp);
 		else 
 			*errorp = bwrite(bp);
 		ACQUIRE_LOCK(ump);
 		return (1);
 	}
 	return (0);
 }
 
 /*
  * Flush dependencies associated with an allocdirect block.
  */
 static int
 flush_newblk_dep(vp, mp, lbn)
 	struct vnode *vp;
 	struct mount *mp;
 	ufs_lbn_t lbn;
 {
 	struct newblk *newblk;
 	struct ufsmount *ump;
 	struct bufobj *bo;
 	struct inode *ip;
 	struct buf *bp;
 	ufs2_daddr_t blkno;
 	int error;
 
 	error = 0;
 	bo = &vp->v_bufobj;
 	ip = VTOI(vp);
 	blkno = DIP(ip, i_db[lbn]);
 	if (blkno == 0)
 		panic("flush_newblk_dep: Missing block");
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	/*
 	 * Loop until all dependencies related to this block are satisfied.
 	 * We must be careful to restart after each sleep in case a write
 	 * completes some part of this process for us.
 	 */
 	for (;;) {
 		if (newblk_lookup(mp, blkno, 0, &newblk) == 0) {
 			FREE_LOCK(ump);
 			break;
 		}
 		if (newblk->nb_list.wk_type != D_ALLOCDIRECT)
 			panic("flush_newblk_deps: Bad newblk %p", newblk);
 		/*
 		 * Flush the journal.
 		 */
 		if (newblk->nb_jnewblk != NULL) {
 			jwait(&newblk->nb_jnewblk->jn_list, MNT_WAIT);
 			continue;
 		}
 		/*
 		 * Write the bitmap dependency.
 		 */
 		if ((newblk->nb_state & DEPCOMPLETE) == 0) {
 			bp = newblk->nb_bmsafemap->sm_buf;
 			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				continue;
 			FREE_LOCK(ump);
 			error = bwrite(bp);
 			if (error)
 				break;
 			ACQUIRE_LOCK(ump);
 			continue;
 		}
 		/*
 		 * Write the buffer.
 		 */
 		FREE_LOCK(ump);
 		BO_LOCK(bo);
 		bp = gbincore(bo, lbn);
 		if (bp != NULL) {
 			error = BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL |
 			    LK_INTERLOCK, BO_LOCKPTR(bo));
 			if (error == ENOLCK) {
 				ACQUIRE_LOCK(ump);
 				error = 0;
 				continue; /* Slept, retry */
 			}
 			if (error != 0)
 				break;	/* Failed */
 			if (bp->b_flags & B_DELWRI) {
 				bremfree(bp);
 				error = bwrite(bp);
 				if (error)
 					break;
 			} else
 				BUF_UNLOCK(bp);
 		} else
 			BO_UNLOCK(bo);
 		/*
 		 * We have to wait for the direct pointers to
 		 * point at the newdirblk before the dependency
 		 * will go away.
 		 */
 		error = ffs_update(vp, 1);
 		if (error)
 			break;
 		ACQUIRE_LOCK(ump);
 	}
 	return (error);
 }
 
 /*
  * Eliminate a pagedep dependency by flushing out all its diradd dependencies.
  * Called with splbio blocked.
  */
 static int
 flush_pagedep_deps(pvp, mp, diraddhdp)
 	struct vnode *pvp;
 	struct mount *mp;
 	struct diraddhd *diraddhdp;
 {
 	struct inodedep *inodedep;
 	struct inoref *inoref;
 	struct ufsmount *ump;
 	struct diradd *dap;
 	struct vnode *vp;
 	int error = 0;
 	struct buf *bp;
 	ino_t inum;
 	struct diraddhd unfinished;
 
 	LIST_INIT(&unfinished);
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 restart:
 	while ((dap = LIST_FIRST(diraddhdp)) != NULL) {
 		/*
 		 * Flush ourselves if this directory entry
 		 * has a MKDIR_PARENT dependency.
 		 */
 		if (dap->da_state & MKDIR_PARENT) {
 			FREE_LOCK(ump);
 			if ((error = ffs_update(pvp, 1)) != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			/*
 			 * All MKDIR_PARENT dependencies and all the
 			 * NEWBLOCK pagedeps that are contained in direct
 			 * blocks were resolved by doing above ffs_update.
 			 * Pagedeps contained in indirect blocks may
 			 * require a complete sync'ing of the directory.
 			 * We are in the midst of doing a complete sync,
 			 * so if they are not resolved in this pass we
 			 * defer them for now as they will be sync'ed by
 			 * our caller shortly.
 			 */
 			LIST_REMOVE(dap, da_pdlist);
 			LIST_INSERT_HEAD(&unfinished, dap, da_pdlist);
 			continue;
 		}
 		/*
 		 * A newly allocated directory must have its "." and
 		 * ".." entries written out before its name can be
 		 * committed in its parent. 
 		 */
 		inum = dap->da_newinum;
 		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
 			panic("flush_pagedep_deps: lost inode1");
 		/*
 		 * Wait for any pending journal adds to complete so we don't
 		 * cause rollbacks while syncing.
 		 */
 		TAILQ_FOREACH(inoref, &inodedep->id_inoreflst, if_deps) {
 			if ((inoref->if_state & (DEPCOMPLETE | GOINGAWAY))
 			    == DEPCOMPLETE) {
 				jwait(&inoref->if_list, MNT_WAIT);
 				goto restart;
 			}
 		}
 		if (dap->da_state & MKDIR_BODY) {
 			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
 			error = flush_newblk_dep(vp, mp, 0);
 			/*
 			 * If we still have the dependency we might need to
 			 * update the vnode to sync the new link count to
 			 * disk.
 			 */
 			if (error == 0 && dap == LIST_FIRST(diraddhdp))
 				error = ffs_update(vp, 1);
 			vput(vp);
 			if (error != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			/*
 			 * If that cleared dependencies, go on to next.
 			 */
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 			if (dap->da_state & MKDIR_BODY) {
 				inodedep_lookup(UFSTOVFS(ump), inum, 0,
 				    &inodedep);
 				panic("flush_pagedep_deps: MKDIR_BODY "
 				    "inodedep %p dap %p vp %p",
 				    inodedep, dap, vp);
 			}
 		}
 		/*
 		 * Flush the inode on which the directory entry depends.
 		 * Having accounted for MKDIR_PARENT and MKDIR_BODY above,
 		 * the only remaining dependency is that the updated inode
 		 * count must get pushed to disk. The inode has already
 		 * been pushed into its inode buffer (via VOP_UPDATE) at
 		 * the time of the reference count change. So we need only
 		 * locate that buffer, ensure that there will be no rollback
 		 * caused by a bitmap dependency, then write the inode buffer.
 		 */
 retry:
 		if (inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep) == 0)
 			panic("flush_pagedep_deps: lost inode");
 		/*
 		 * If the inode still has bitmap dependencies,
 		 * push them to disk.
 		 */
 		if ((inodedep->id_state & (DEPCOMPLETE | GOINGAWAY)) == 0) {
 			bp = inodedep->id_bmsafemap->sm_buf;
 			bp = getdirtybuf(bp, LOCK_PTR(ump), MNT_WAIT);
 			if (bp == NULL)
 				goto retry;
 			FREE_LOCK(ump);
 			if ((error = bwrite(bp)) != 0)
 				break;
 			ACQUIRE_LOCK(ump);
 			if (dap != LIST_FIRST(diraddhdp))
 				continue;
 		}
 		/*
 		 * If the inode is still sitting in a buffer waiting
 		 * to be written or waiting for the link count to be
 		 * adjusted update it here to flush it to disk.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
 			FREE_LOCK(ump);
 			if ((error = ffs_vgetf(mp, inum, LK_EXCLUSIVE, &vp,
 			    FFSV_FORCEINSMQ)))
 				break;
 			error = ffs_update(vp, 1);
 			vput(vp);
 			if (error)
 				break;
 			ACQUIRE_LOCK(ump);
 		}
 		/*
 		 * If we have failed to get rid of all the dependencies
 		 * then something is seriously wrong.
 		 */
 		if (dap == LIST_FIRST(diraddhdp)) {
 			inodedep_lookup(UFSTOVFS(ump), inum, 0, &inodedep);
 			panic("flush_pagedep_deps: failed to flush " 
 			    "inodedep %p ino %ju dap %p",
 			    inodedep, (uintmax_t)inum, dap);
 		}
 	}
 	if (error)
 		ACQUIRE_LOCK(ump);
 	while ((dap = LIST_FIRST(&unfinished)) != NULL) {
 		LIST_REMOVE(dap, da_pdlist);
 		LIST_INSERT_HEAD(diraddhdp, dap, da_pdlist);
 	}
 	return (error);
 }
 
 /*
  * A large burst of file addition or deletion activity can drive the
  * memory load excessively high. First attempt to slow things down
  * using the techniques below. If that fails, this routine requests
  * the offending operations to fall back to running synchronously
  * until the memory load returns to a reasonable level.
  */
 int
 softdep_slowdown(vp)
 	struct vnode *vp;
 {
 	struct ufsmount *ump;
 	int jlow;
 	int max_softdeps_hard;
 
 	KASSERT(MOUNTEDSOFTDEP(vp->v_mount) != 0,
 	    ("softdep_slowdown called on non-softdep filesystem"));
 	ump = VFSTOUFS(vp->v_mount);
 	ACQUIRE_LOCK(ump);
 	jlow = 0;
 	/*
 	 * Check for journal space if needed.
 	 */
 	if (DOINGSUJ(vp)) {
 		if (journal_space(ump, 0) == 0)
 			jlow = 1;
 	}
 	/*
 	 * If the system is under its limits and our filesystem is
 	 * not responsible for more than our share of the usage and
 	 * we are not low on journal space, then no need to slow down.
 	 */
 	max_softdeps_hard = max_softdeps * 11 / 10;
 	if (dep_current[D_DIRREM] < max_softdeps_hard / 2 &&
 	    dep_current[D_INODEDEP] < max_softdeps_hard &&
 	    dep_current[D_INDIRDEP] < max_softdeps_hard / 1000 &&
 	    dep_current[D_FREEBLKS] < max_softdeps_hard && jlow == 0 &&
 	    ump->softdep_curdeps[D_DIRREM] <
 	    (max_softdeps_hard / 2) / stat_flush_threads &&
 	    ump->softdep_curdeps[D_INODEDEP] <
 	    max_softdeps_hard / stat_flush_threads &&
 	    ump->softdep_curdeps[D_INDIRDEP] <
 	    (max_softdeps_hard / 1000) / stat_flush_threads &&
 	    ump->softdep_curdeps[D_FREEBLKS] <
 	    max_softdeps_hard / stat_flush_threads) {
 		FREE_LOCK(ump);
   		return (0);
 	}
 	/*
 	 * If the journal is low or our filesystem is over its limit
 	 * then speedup the cleanup.
 	 */
 	if (ump->softdep_curdeps[D_INDIRDEP] <
 	    (max_softdeps_hard / 1000) / stat_flush_threads || jlow)
 		softdep_speedup(ump);
 	stat_sync_limit_hit += 1;
 	FREE_LOCK(ump);
 	/*
 	 * We only slow down the rate at which new dependencies are
 	 * generated if we are not using journaling. With journaling,
 	 * the cleanup should always be sufficient to keep things
 	 * under control.
 	 */
 	if (DOINGSUJ(vp))
 		return (0);
 	return (1);
 }
 
 /*
  * Called by the allocation routines when they are about to fail
  * in the hope that we can free up the requested resource (inodes
  * or disk space).
  * 
  * First check to see if the work list has anything on it. If it has,
  * clean up entries until we successfully free the requested resource.
  * Because this process holds inodes locked, we cannot handle any remove
  * requests that might block on a locked inode as that could lead to
  * deadlock. If the worklist yields none of the requested resource,
  * start syncing out vnodes to free up the needed space.
  */
 int
 softdep_request_cleanup(fs, vp, cred, resource)
 	struct fs *fs;
 	struct vnode *vp;
 	struct ucred *cred;
 	int resource;
 {
 	struct ufsmount *ump;
 	struct mount *mp;
 	struct vnode *lvp, *mvp;
 	long starttime;
 	ufs2_daddr_t needed;
 	int error;
 
 	/*
 	 * If we are being called because of a process doing a
 	 * copy-on-write, then it is not safe to process any
 	 * worklist items as we will recurse into the copyonwrite
 	 * routine.  This will result in an incoherent snapshot.
 	 * If the vnode that we hold is a snapshot, we must avoid
 	 * handling other resources that could cause deadlock.
 	 */
 	if ((curthread->td_pflags & TDP_COWINPROGRESS) || IS_SNAPSHOT(VTOI(vp)))
 		return (0);
 
 	if (resource == FLUSH_BLOCKS_WAIT)
 		stat_cleanup_blkrequests += 1;
 	else
 		stat_cleanup_inorequests += 1;
 
 	mp = vp->v_mount;
 	ump = VFSTOUFS(mp);
 	mtx_assert(UFS_MTX(ump), MA_OWNED);
 	UFS_UNLOCK(ump);
 	error = ffs_update(vp, 1);
 	if (error != 0 || MOUNTEDSOFTDEP(mp) == 0) {
 		UFS_LOCK(ump);
 		return (0);
 	}
 	/*
 	 * If we are in need of resources, start by cleaning up
 	 * any block removals associated with our inode.
 	 */
 	ACQUIRE_LOCK(ump);
 	process_removes(vp);
 	process_truncates(vp);
 	FREE_LOCK(ump);
 	/*
 	 * Now clean up at least as many resources as we will need.
 	 *
 	 * When requested to clean up inodes, the number that are needed
 	 * is set by the number of simultaneous writers (mnt_writeopcount)
 	 * plus a bit of slop (2) in case some more writers show up while
 	 * we are cleaning.
 	 *
 	 * When requested to free up space, the amount of space that
 	 * we need is enough blocks to allocate a full-sized segment
 	 * (fs_contigsumsize). The number of such segments that will
 	 * be needed is set by the number of simultaneous writers
 	 * (mnt_writeopcount) plus a bit of slop (2) in case some more
 	 * writers show up while we are cleaning.
 	 *
 	 * Additionally, if we are unpriviledged and allocating space,
 	 * we need to ensure that we clean up enough blocks to get the
 	 * needed number of blocks over the threshold of the minimum
 	 * number of blocks required to be kept free by the filesystem
 	 * (fs_minfree).
 	 */
 	if (resource == FLUSH_INODES_WAIT) {
 		needed = vp->v_mount->mnt_writeopcount + 2;
 	} else if (resource == FLUSH_BLOCKS_WAIT) {
 		needed = (vp->v_mount->mnt_writeopcount + 2) *
 		    fs->fs_contigsumsize;
 		if (priv_check_cred(cred, PRIV_VFS_BLOCKRESERVE, 0))
 			needed += fragstoblks(fs,
 			    roundup((fs->fs_dsize * fs->fs_minfree / 100) -
 			    fs->fs_cstotal.cs_nffree, fs->fs_frag));
 	} else {
 		UFS_LOCK(ump);
 		printf("softdep_request_cleanup: Unknown resource type %d\n",
 		    resource);
 		return (0);
 	}
 	starttime = time_second;
 retry:
 	if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 &&
 	    fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	    fs->fs_cstotal.cs_nifree <= needed)) {
 		ACQUIRE_LOCK(ump);
 		if (ump->softdep_on_worklist > 0 &&
 		    process_worklist_item(UFSTOVFS(ump),
 		    ump->softdep_on_worklist, LK_NOWAIT) != 0)
 			stat_worklist_push += 1;
 		FREE_LOCK(ump);
 	}
 	/*
 	 * If we still need resources and there are no more worklist
 	 * entries to process to obtain them, we have to start flushing
 	 * the dirty vnodes to force the release of additional requests
 	 * to the worklist that we can then process to reap addition
 	 * resources. We walk the vnodes associated with the mount point
 	 * until we get the needed worklist requests that we can reap.
 	 */
 	if ((resource == FLUSH_BLOCKS_WAIT && 
 	     fs->fs_cstotal.cs_nbfree <= needed) ||
 	    (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 &&
 	     fs->fs_cstotal.cs_nifree <= needed)) {
 		MNT_VNODE_FOREACH_ALL(lvp, mp, mvp) {
 			if (TAILQ_FIRST(&lvp->v_bufobj.bo_dirty.bv_hd) == 0) {
 				VI_UNLOCK(lvp);
 				continue;
 			}
 			if (vget(lvp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT,
 			    curthread))
 				continue;
 			if (lvp->v_vflag & VV_NOSYNC) {	/* unlinked */
 				vput(lvp);
 				continue;
 			}
 			(void) ffs_syncvnode(lvp, MNT_NOWAIT, 0);
 			vput(lvp);
 		}
 		lvp = ump->um_devvp;
 		if (vn_lock(lvp, LK_EXCLUSIVE | LK_NOWAIT) == 0) {
 			VOP_FSYNC(lvp, MNT_NOWAIT, curthread);
 			VOP_UNLOCK(lvp, 0);
 		}
 		if (ump->softdep_on_worklist > 0) {
 			stat_cleanup_retries += 1;
 			goto retry;
 		}
 		stat_cleanup_failures += 1;
 	}
 	if (time_second - starttime > stat_cleanup_high_delay)
 		stat_cleanup_high_delay = time_second - starttime;
 	UFS_LOCK(ump);
 	return (1);
 }
 
 static bool
 softdep_excess_items(struct ufsmount *ump, int item)
 {
 
 	KASSERT(item >= 0 && item < D_LAST, ("item %d", item));
 	return (dep_current[item] > max_softdeps &&
 	    ump->softdep_curdeps[item] > max_softdeps /
 	    stat_flush_threads);
 }
 
 static void
 schedule_cleanup(struct mount *mp)
 {
 	struct ufsmount *ump;
 	struct thread *td;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	FREE_LOCK(ump);
 	td = curthread;
 	if ((td->td_pflags & TDP_KTHREAD) != 0 &&
 	    (td->td_proc->p_flag2 & P2_AST_SU) == 0) {
 		/*
 		 * No ast is delivered to kernel threads, so nobody
 		 * would deref the mp.  Some kernel threads
 		 * explicitely check for AST, e.g. NFS daemon does
 		 * this in the serving loop.
 		 */
 		return;
 	}
 	if (td->td_su != NULL)
 		vfs_rel(td->td_su);
 	vfs_ref(mp);
 	td->td_su = mp;
 	thread_lock(td);
 	td->td_flags |= TDF_ASTPENDING;
 	thread_unlock(td);
 }
 
 static void
 softdep_ast_cleanup_proc(void)
 {
 	struct thread *td;
 	struct mount *mp;
 	struct ufsmount *ump;
 	int error;
 	bool req;
 
 	td = curthread;
 	while ((mp = td->td_su) != NULL) {
 		td->td_su = NULL;
 		error = vfs_busy(mp, MBF_NOWAIT);
 		vfs_rel(mp);
 		if (error != 0)
 			return;
 		if (ffs_own_mount(mp) && MOUNTEDSOFTDEP(mp)) {
 			ump = VFSTOUFS(mp);
 			for (;;) {
 				req = false;
 				ACQUIRE_LOCK(ump);
 				if (softdep_excess_items(ump, D_INODEDEP)) {
 					req = true;
 					request_cleanup(mp, FLUSH_INODES);
 				}
 				if (softdep_excess_items(ump, D_DIRREM)) {
 					req = true;
 					request_cleanup(mp, FLUSH_BLOCKS);
 				}
 				FREE_LOCK(ump);
 				if (softdep_excess_items(ump, D_NEWBLK) ||
 				    softdep_excess_items(ump, D_ALLOCDIRECT) ||
 				    softdep_excess_items(ump, D_ALLOCINDIR)) {
 					error = vn_start_write(NULL, &mp,
 					    V_WAIT);
 					if (error == 0) {
 						req = true;
 						VFS_SYNC(mp, MNT_WAIT);
 						vn_finished_write(mp);
 					}
 				}
 				if ((td->td_pflags & TDP_KTHREAD) != 0 || !req)
 					break;
 			}
 		}
 		vfs_unbusy(mp);
 	}
 }
 
 /*
  * If memory utilization has gotten too high, deliberately slow things
  * down and speed up the I/O processing.
  */
 static int
 request_cleanup(mp, resource)
 	struct mount *mp;
 	int resource;
 {
 	struct thread *td = curthread;
 	struct ufsmount *ump;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 	/*
 	 * We never hold up the filesystem syncer or buf daemon.
 	 */
 	if (td->td_pflags & (TDP_SOFTDEP|TDP_NORUNNINGBUF))
 		return (0);
 	/*
 	 * First check to see if the work list has gotten backlogged.
 	 * If it has, co-opt this process to help clean up two entries.
 	 * Because this process may hold inodes locked, we cannot
 	 * handle any remove requests that might block on a locked
 	 * inode as that could lead to deadlock.  We set TDP_SOFTDEP
 	 * to avoid recursively processing the worklist.
 	 */
 	if (ump->softdep_on_worklist > max_softdeps / 10) {
 		td->td_pflags |= TDP_SOFTDEP;
 		process_worklist_item(mp, 2, LK_NOWAIT);
 		td->td_pflags &= ~TDP_SOFTDEP;
 		stat_worklist_push += 2;
 		return(1);
 	}
 	/*
 	 * Next, we attempt to speed up the syncer process. If that
 	 * is successful, then we allow the process to continue.
 	 */
 	if (softdep_speedup(ump) &&
 	    resource != FLUSH_BLOCKS_WAIT &&
 	    resource != FLUSH_INODES_WAIT)
 		return(0);
 	/*
 	 * If we are resource constrained on inode dependencies, try
 	 * flushing some dirty inodes. Otherwise, we are constrained
 	 * by file deletions, so try accelerating flushes of directories
 	 * with removal dependencies. We would like to do the cleanup
 	 * here, but we probably hold an inode locked at this point and 
 	 * that might deadlock against one that we try to clean. So,
 	 * the best that we can do is request the syncer daemon to do
 	 * the cleanup for us.
 	 */
 	switch (resource) {
 
 	case FLUSH_INODES:
 	case FLUSH_INODES_WAIT:
 		ACQUIRE_GBLLOCK(&lk);
 		stat_ino_limit_push += 1;
 		req_clear_inodedeps += 1;
 		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_ino_limit_hit;
 		break;
 
 	case FLUSH_BLOCKS:
 	case FLUSH_BLOCKS_WAIT:
 		ACQUIRE_GBLLOCK(&lk);
 		stat_blk_limit_push += 1;
 		req_clear_remove += 1;
 		FREE_GBLLOCK(&lk);
 		stat_countp = &stat_blk_limit_hit;
 		break;
 
 	default:
 		panic("request_cleanup: unknown type");
 	}
 	/*
 	 * Hopefully the syncer daemon will catch up and awaken us.
 	 * We wait at most tickdelay before proceeding in any case.
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	FREE_LOCK(ump);
 	proc_waiting += 1;
 	if (callout_pending(&softdep_callout) == FALSE)
 		callout_reset(&softdep_callout, tickdelay > 2 ? tickdelay : 2,
 		    pause_timer, 0);
 
 	if ((td->td_pflags & TDP_KTHREAD) == 0)
 		msleep((caddr_t)&proc_waiting, &lk, PPAUSE, "softupdate", 0);
 	proc_waiting -= 1;
 	FREE_GBLLOCK(&lk);
 	ACQUIRE_LOCK(ump);
 	return (1);
 }
 
 /*
  * Awaken processes pausing in request_cleanup and clear proc_waiting
  * to indicate that there is no longer a timer running. Pause_timer
  * will be called with the global softdep mutex (&lk) locked.
  */
 static void
 pause_timer(arg)
 	void *arg;
 {
 
 	GBLLOCK_OWNED(&lk);
 	/*
 	 * The callout_ API has acquired mtx and will hold it around this
 	 * function call.
 	 */
 	*stat_countp += proc_waiting;
 	wakeup(&proc_waiting);
 }
 
 /*
  * If requested, try removing inode or removal dependencies.
  */
 static void
 check_clear_deps(mp)
 	struct mount *mp;
 {
 
 	/*
 	 * If we are suspended, it may be because of our using
 	 * too many inodedeps, so help clear them out.
 	 */
 	if (MOUNTEDSUJ(mp) && VFSTOUFS(mp)->softdep_jblocks->jb_suspended)
 		clear_inodedeps(mp);
 	/*
 	 * General requests for cleanup of backed up dependencies
 	 */
 	ACQUIRE_GBLLOCK(&lk);
 	if (req_clear_inodedeps) {
 		req_clear_inodedeps -= 1;
 		FREE_GBLLOCK(&lk);
 		clear_inodedeps(mp);
 		ACQUIRE_GBLLOCK(&lk);
 		wakeup(&proc_waiting);
 	}
 	if (req_clear_remove) {
 		req_clear_remove -= 1;
 		FREE_GBLLOCK(&lk);
 		clear_remove(mp);
 		ACQUIRE_GBLLOCK(&lk);
 		wakeup(&proc_waiting);
 	}
 	FREE_GBLLOCK(&lk);
 }
 
 /*
  * Flush out a directory with at least one removal dependency in an effort to
  * reduce the number of dirrem, freefile, and freeblks dependency structures.
  */
 static void
 clear_remove(mp)
 	struct mount *mp;
 {
 	struct pagedep_hashhead *pagedephd;
 	struct pagedep *pagedep;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct bufobj *bo;
 	int error, cnt;
 	ino_t ino;
 
 	ump = VFSTOUFS(mp);
 	LOCK_OWNED(ump);
 
 	for (cnt = 0; cnt <= ump->pagedep_hash_size; cnt++) {
 		pagedephd = &ump->pagedep_hashtbl[ump->pagedep_nextclean++];
 		if (ump->pagedep_nextclean > ump->pagedep_hash_size)
 			ump->pagedep_nextclean = 0;
 		LIST_FOREACH(pagedep, pagedephd, pd_hash) {
 			if (LIST_EMPTY(&pagedep->pd_dirremhd))
 				continue;
 			ino = pagedep->pd_ino;
 			if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 				continue;
 			FREE_LOCK(ump);
 
 			/*
 			 * Let unmount clear deps
 			 */
 			error = vfs_busy(mp, MBF_NOWAIT);
 			if (error != 0)
 				goto finish_write;
 			error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
 			     FFSV_FORCEINSMQ);
 			vfs_unbusy(mp);
 			if (error != 0) {
 				softdep_error("clear_remove: vget", error);
 				goto finish_write;
 			}
 			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
 				softdep_error("clear_remove: fsync", error);
 			bo = &vp->v_bufobj;
 			BO_LOCK(bo);
 			drain_output(vp);
 			BO_UNLOCK(bo);
 			vput(vp);
 		finish_write:
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 	}
 }
 
 /*
  * Clear out a block of dirty inodes in an effort to reduce
  * the number of inodedep dependency structures.
  */
 static void
 clear_inodedeps(mp)
 	struct mount *mp;
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	struct vnode *vp;
 	struct fs *fs;
 	int error, cnt;
 	ino_t firstino, lastino, ino;
 
 	ump = VFSTOUFS(mp);
 	fs = ump->um_fs;
 	LOCK_OWNED(ump);
 	/*
 	 * Pick a random inode dependency to be cleared.
 	 * We will then gather up all the inodes in its block 
 	 * that have dependencies and flush them out.
 	 */
 	for (cnt = 0; cnt <= ump->inodedep_hash_size; cnt++) {
 		inodedephd = &ump->inodedep_hashtbl[ump->inodedep_nextclean++];
 		if (ump->inodedep_nextclean > ump->inodedep_hash_size)
 			ump->inodedep_nextclean = 0;
 		if ((inodedep = LIST_FIRST(inodedephd)) != NULL)
 			break;
 	}
 	if (inodedep == NULL)
 		return;
 	/*
 	 * Find the last inode in the block with dependencies.
 	 */
 	firstino = rounddown2(inodedep->id_ino, INOPB(fs));
 	for (lastino = firstino + INOPB(fs) - 1; lastino > firstino; lastino--)
 		if (inodedep_lookup(mp, lastino, 0, &inodedep) != 0)
 			break;
 	/*
 	 * Asynchronously push all but the last inode with dependencies.
 	 * Synchronously push the last inode with dependencies to ensure
 	 * that the inode block gets written to free up the inodedeps.
 	 */
 	for (ino = firstino; ino <= lastino; ino++) {
 		if (inodedep_lookup(mp, ino, 0, &inodedep) == 0)
 			continue;
 		if (vn_start_write(NULL, &mp, V_NOWAIT) != 0)
 			continue;
 		FREE_LOCK(ump);
 		error = vfs_busy(mp, MBF_NOWAIT); /* Let unmount clear deps */
 		if (error != 0) {
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 		if ((error = ffs_vgetf(mp, ino, LK_EXCLUSIVE, &vp,
 		    FFSV_FORCEINSMQ)) != 0) {
 			softdep_error("clear_inodedeps: vget", error);
 			vfs_unbusy(mp);
 			vn_finished_write(mp);
 			ACQUIRE_LOCK(ump);
 			return;
 		}
 		vfs_unbusy(mp);
 		if (ino == lastino) {
 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)))
 				softdep_error("clear_inodedeps: fsync1", error);
 		} else {
 			if ((error = ffs_syncvnode(vp, MNT_NOWAIT, 0)))
 				softdep_error("clear_inodedeps: fsync2", error);
 			BO_LOCK(&vp->v_bufobj);
 			drain_output(vp);
 			BO_UNLOCK(&vp->v_bufobj);
 		}
 		vput(vp);
 		vn_finished_write(mp);
 		ACQUIRE_LOCK(ump);
 	}
 }
 
 void
 softdep_buf_append(bp, wkhd)
 	struct buf *bp;
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	if ((wk = LIST_FIRST(wkhd)) == NULL)
 		return;
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_buf_append called on non-softdep filesystem"));
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	while ((wk = LIST_FIRST(wkhd)) != NULL) {
 		WORKLIST_REMOVE(wk);
 		WORKLIST_INSERT(&bp->b_dep, wk);
 	}
 	FREE_LOCK(ump);
 
 }
 
 void
 softdep_inode_append(ip, cred, wkhd)
 	struct inode *ip;
 	struct ucred *cred;
 	struct workhead *wkhd;
 {
 	struct buf *bp;
 	struct fs *fs;
 	struct ufsmount *ump;
 	int error;
 
 	ump = ITOUMP(ip);
 	KASSERT(MOUNTEDSOFTDEP(UFSTOVFS(ump)) != 0,
 	    ("softdep_inode_append called on non-softdep filesystem"));
 	fs = ump->um_fs;
 	error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 	    (int)fs->fs_bsize, cred, &bp);
 	if (error) {
 		bqrelse(bp);
 		softdep_freework(wkhd);
 		return;
 	}
 	softdep_buf_append(bp, wkhd);
 	bqrelse(bp);
 }
 
 void
 softdep_freework(wkhd)
 	struct workhead *wkhd;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 
 	if ((wk = LIST_FIRST(wkhd)) == NULL)
 		return;
 	KASSERT(MOUNTEDSOFTDEP(wk->wk_mp) != 0,
 	    ("softdep_freework called on non-softdep filesystem"));
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	handle_jwork(wkhd);
 	FREE_LOCK(ump);
 }
 
 /*
  * Function to determine if the buffer has outstanding dependencies
  * that will cause a roll-back if the buffer is written. If wantcount
  * is set, return number of dependencies, otherwise just yes or no.
  */
 static int
 softdep_count_dependencies(bp, wantcount)
 	struct buf *bp;
 	int wantcount;
 {
 	struct worklist *wk;
 	struct ufsmount *ump;
 	struct bmsafemap *bmsafemap;
 	struct freework *freework;
 	struct inodedep *inodedep;
 	struct indirdep *indirdep;
 	struct freeblks *freeblks;
 	struct allocindir *aip;
 	struct pagedep *pagedep;
 	struct dirrem *dirrem;
 	struct newblk *newblk;
 	struct mkdir *mkdir;
 	struct diradd *dap;
 	int i, retval;
 
 	retval = 0;
 	if ((wk = LIST_FIRST(&bp->b_dep)) == NULL)
 		return (0);
 	ump = VFSTOUFS(wk->wk_mp);
 	ACQUIRE_LOCK(ump);
 	LIST_FOREACH(wk, &bp->b_dep, wk_list) {
 		switch (wk->wk_type) {
 
 		case D_INODEDEP:
 			inodedep = WK_INODEDEP(wk);
 			if ((inodedep->id_state & DEPCOMPLETE) == 0) {
 				/* bitmap allocation dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_inoupdt)) {
 				/* direct block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_extupdt)) {
 				/* direct block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (TAILQ_FIRST(&inodedep->id_inoreflst)) {
 				/* Add reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_INDIRDEP:
 			indirdep = WK_INDIRDEP(wk);
 
 			TAILQ_FOREACH(freework, &indirdep->ir_trunc, fw_next) {
 				/* indirect truncation dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 
 			LIST_FOREACH(aip, &indirdep->ir_deplisthd, ai_next) {
 				/* indirect block pointer dependency */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_PAGEDEP:
 			pagedep = WK_PAGEDEP(wk);
 			LIST_FOREACH(dirrem, &pagedep->pd_dirremhd, dm_next) {
 				if (LIST_FIRST(&dirrem->dm_jremrefhd)) {
 					/* Journal remove ref dependency. */
 					retval += 1;
 					if (!wantcount)
 						goto out;
 				}
 			}
 			for (i = 0; i < DAHASHSZ; i++) {
 
 				LIST_FOREACH(dap, &pagedep->pd_diraddhd[i], da_pdlist) {
 					/* directory entry dependency */
 					retval += 1;
 					if (!wantcount)
 						goto out;
 				}
 			}
 			continue;
 
 		case D_BMSAFEMAP:
 			bmsafemap = WK_BMSAFEMAP(wk);
 			if (LIST_FIRST(&bmsafemap->sm_jaddrefhd)) {
 				/* Add reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			if (LIST_FIRST(&bmsafemap->sm_jnewblkhd)) {
 				/* Allocate block dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_FREEBLKS:
 			freeblks = WK_FREEBLKS(wk);
 			if (LIST_FIRST(&freeblks->fb_jblkdephd)) {
 				/* Freeblk journal dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_ALLOCDIRECT:
 		case D_ALLOCINDIR:
 			newblk = WK_NEWBLK(wk);
 			if (newblk->nb_jnewblk) {
 				/* Journal allocate dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_MKDIR:
 			mkdir = WK_MKDIR(wk);
 			if (mkdir->md_jaddref) {
 				/* Journal reference dependency. */
 				retval += 1;
 				if (!wantcount)
 					goto out;
 			}
 			continue;
 
 		case D_FREEWORK:
 		case D_FREEDEP:
 		case D_JSEGDEP:
 		case D_JSEG:
 		case D_SBDEP:
 			/* never a dependency on these blocks */
 			continue;
 
 		default:
 			panic("softdep_count_dependencies: Unexpected type %s",
 			    TYPENAME(wk->wk_type));
 			/* NOTREACHED */
 		}
 	}
 out:
 	FREE_LOCK(ump);
 	return retval;
 }
 
 /*
  * Acquire exclusive access to a buffer.
  * Must be called with a locked mtx parameter.
  * Return acquired buffer or NULL on failure.
  */
 static struct buf *
 getdirtybuf(bp, lock, waitfor)
 	struct buf *bp;
 	struct rwlock *lock;
 	int waitfor;
 {
 	int error;
 
 	if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL) != 0) {
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 		error = BUF_LOCK(bp,
 		    LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, lock);
 		/*
 		 * Even if we successfully acquire bp here, we have dropped
 		 * lock, which may violates our guarantee.
 		 */
 		if (error == 0)
 			BUF_UNLOCK(bp);
 		else if (error != ENOLCK)
 			panic("getdirtybuf: inconsistent lock: %d", error);
 		rw_wlock(lock);
 		return (NULL);
 	}
 	if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 		if (lock != BO_LOCKPTR(bp->b_bufobj) && waitfor == MNT_WAIT) {
 			rw_wunlock(lock);
 			BO_LOCK(bp->b_bufobj);
 			BUF_UNLOCK(bp);
 			if ((bp->b_vflags & BV_BKGRDINPROG) != 0) {
 				bp->b_vflags |= BV_BKGRDWAIT;
 				msleep(&bp->b_xflags, BO_LOCKPTR(bp->b_bufobj),
 				       PRIBIO | PDROP, "getbuf", 0);
 			} else
 				BO_UNLOCK(bp->b_bufobj);
 			rw_wlock(lock);
 			return (NULL);
 		}
 		BUF_UNLOCK(bp);
 		if (waitfor != MNT_WAIT)
 			return (NULL);
 		/*
 		 * The lock argument must be bp->b_vp's mutex in
 		 * this case.
 		 */
 #ifdef	DEBUG_VFS_LOCKS
 		if (bp->b_vp->v_type != VCHR)
 			ASSERT_BO_WLOCKED(bp->b_bufobj);
 #endif
 		bp->b_vflags |= BV_BKGRDWAIT;
 		rw_sleep(&bp->b_xflags, lock, PRIBIO, "getbuf", 0);
 		return (NULL);
 	}
 	if ((bp->b_flags & B_DELWRI) == 0) {
 		BUF_UNLOCK(bp);
 		return (NULL);
 	}
 	bremfree(bp);
 	return (bp);
 }
 
 
 /*
  * Check if it is safe to suspend the file system now.  On entry,
  * the vnode interlock for devvp should be held.  Return 0 with
  * the mount interlock held if the file system can be suspended now,
  * otherwise return EAGAIN with the mount interlock held.
  */
 int
 softdep_check_suspend(struct mount *mp,
 		      struct vnode *devvp,
 		      int softdep_depcnt,
 		      int softdep_accdepcnt,
 		      int secondary_writes,
 		      int secondary_accwrites)
 {
 	struct bufobj *bo;
 	struct ufsmount *ump;
 	struct inodedep *inodedep;
 	int error, unlinked;
 
 	bo = &devvp->v_bufobj;
 	ASSERT_BO_WLOCKED(bo);
 
 	/*
 	 * If we are not running with soft updates, then we need only
 	 * deal with secondary writes as we try to suspend.
 	 */
 	if (MOUNTEDSOFTDEP(mp) == 0) {
 		MNT_ILOCK(mp);
 		while (mp->mnt_secondary_writes != 0) {
 			BO_UNLOCK(bo);
 			msleep(&mp->mnt_secondary_writes, MNT_MTX(mp),
 			    (PUSER - 1) | PDROP, "secwr", 0);
 			BO_LOCK(bo);
 			MNT_ILOCK(mp);
 		}
 
 		/*
 		 * Reasons for needing more work before suspend:
 		 * - Dirty buffers on devvp.
 		 * - Secondary writes occurred after start of vnode sync loop
 		 */
 		error = 0;
 		if (bo->bo_numoutput > 0 ||
 		    bo->bo_dirty.bv_cnt > 0 ||
 		    secondary_writes != 0 ||
 		    mp->mnt_secondary_writes != 0 ||
 		    secondary_accwrites != mp->mnt_secondary_accwrites)
 			error = EAGAIN;
 		BO_UNLOCK(bo);
 		return (error);
 	}
 
 	/*
 	 * If we are running with soft updates, then we need to coordinate
 	 * with them as we try to suspend.
 	 */
 	ump = VFSTOUFS(mp);
 	for (;;) {
 		if (!TRY_ACQUIRE_LOCK(ump)) {
 			BO_UNLOCK(bo);
 			ACQUIRE_LOCK(ump);
 			FREE_LOCK(ump);
 			BO_LOCK(bo);
 			continue;
 		}
 		MNT_ILOCK(mp);
 		if (mp->mnt_secondary_writes != 0) {
 			FREE_LOCK(ump);
 			BO_UNLOCK(bo);
 			msleep(&mp->mnt_secondary_writes,
 			       MNT_MTX(mp),
 			       (PUSER - 1) | PDROP, "secwr", 0);
 			BO_LOCK(bo);
 			continue;
 		}
 		break;
 	}
 
 	unlinked = 0;
 	if (MOUNTEDSUJ(mp)) {
 		for (inodedep = TAILQ_FIRST(&ump->softdep_unlinked);
 		    inodedep != NULL;
 		    inodedep = TAILQ_NEXT(inodedep, id_unlinked)) {
 			if ((inodedep->id_state & (UNLINKED | UNLINKLINKS |
 			    UNLINKONLIST)) != (UNLINKED | UNLINKLINKS |
 			    UNLINKONLIST) ||
 			    !check_inodedep_free(inodedep))
 				continue;
 			unlinked++;
 		}
 	}
 
 	/*
 	 * Reasons for needing more work before suspend:
 	 * - Dirty buffers on devvp.
 	 * - Softdep activity occurred after start of vnode sync loop
 	 * - Secondary writes occurred after start of vnode sync loop
 	 */
 	error = 0;
 	if (bo->bo_numoutput > 0 ||
 	    bo->bo_dirty.bv_cnt > 0 ||
 	    softdep_depcnt != unlinked ||
 	    ump->softdep_deps != unlinked ||
 	    softdep_accdepcnt != ump->softdep_accdeps ||
 	    secondary_writes != 0 ||
 	    mp->mnt_secondary_writes != 0 ||
 	    secondary_accwrites != mp->mnt_secondary_accwrites)
 		error = EAGAIN;
 	FREE_LOCK(ump);
 	BO_UNLOCK(bo);
 	return (error);
 }
 
 
 /*
  * Get the number of dependency structures for the file system, both
  * the current number and the total number allocated.  These will
  * later be used to detect that softdep processing has occurred.
  */
 void
 softdep_get_depcounts(struct mount *mp,
 		      int *softdep_depsp,
 		      int *softdep_accdepsp)
 {
 	struct ufsmount *ump;
 
 	if (MOUNTEDSOFTDEP(mp) == 0) {
 		*softdep_depsp = 0;
 		*softdep_accdepsp = 0;
 		return;
 	}
 	ump = VFSTOUFS(mp);
 	ACQUIRE_LOCK(ump);
 	*softdep_depsp = ump->softdep_deps;
 	*softdep_accdepsp = ump->softdep_accdeps;
 	FREE_LOCK(ump);
 }
 
 /*
  * Wait for pending output on a vnode to complete.
  * Must be called with vnode lock and interlock locked.
  *
  * XXX: Should just be a call to bufobj_wwait().
  */
 static void
 drain_output(vp)
 	struct vnode *vp;
 {
 	struct bufobj *bo;
 
 	bo = &vp->v_bufobj;
 	ASSERT_VOP_LOCKED(vp, "drain_output");
 	ASSERT_BO_WLOCKED(bo);
 
 	while (bo->bo_numoutput) {
 		bo->bo_flag |= BO_WWAIT;
 		msleep((caddr_t)&bo->bo_numoutput,
 		    BO_LOCKPTR(bo), PRIBIO + 1, "drainvp", 0);
 	}
 }
 
 /*
  * Called whenever a buffer that is being invalidated or reallocated
  * contains dependencies. This should only happen if an I/O error has
  * occurred. The routine is called with the buffer locked.
  */ 
 static void
 softdep_deallocate_dependencies(bp)
 	struct buf *bp;
 {
 
 	if ((bp->b_ioflags & BIO_ERROR) == 0)
 		panic("softdep_deallocate_dependencies: dangling deps");
 	if (bp->b_vp != NULL && bp->b_vp->v_mount != NULL)
 		softdep_error(bp->b_vp->v_mount->mnt_stat.f_mntonname, bp->b_error);
 	else
 		printf("softdep_deallocate_dependencies: "
 		    "got error %d while accessing filesystem\n", bp->b_error);
 	if (bp->b_error != ENXIO)
 		panic("softdep_deallocate_dependencies: unrecovered I/O error");
 }
 
 /*
  * Function to handle asynchronous write errors in the filesystem.
  */
 static void
 softdep_error(func, error)
 	char *func;
 	int error;
 {
 
 	/* XXX should do something better! */
 	printf("%s: got error %d while accessing filesystem\n", func, error);
 }
 
 #ifdef DDB
 
 static void
 inodedep_print(struct inodedep *inodedep, int verbose)
 {
-	db_printf("%p fs %p st %x ino %jd inoblk %jd delta %d nlink %d"
+	db_printf("%p fs %p st %x ino %jd inoblk %jd delta %jd nlink %jd"
 	    " saveino %p\n",
 	    inodedep, inodedep->id_fs, inodedep->id_state,
 	    (intmax_t)inodedep->id_ino,
 	    (intmax_t)fsbtodb(inodedep->id_fs,
 	    ino_to_fsba(inodedep->id_fs, inodedep->id_ino)),
-	    inodedep->id_nlinkdelta, inodedep->id_savednlink,
+	    (intmax_t)inodedep->id_nlinkdelta,
+	    (intmax_t)inodedep->id_savednlink,
 	    inodedep->id_savedino1);
 
 	if (verbose == 0)
 		return;
 
 	db_printf("\tpendinghd %p, bufwait %p, inowait %p, inoreflst %p, "
 	    "mkdiradd %p\n",
 	    LIST_FIRST(&inodedep->id_pendinghd),
 	    LIST_FIRST(&inodedep->id_bufwait),
 	    LIST_FIRST(&inodedep->id_inowait),
 	    TAILQ_FIRST(&inodedep->id_inoreflst),
 	    inodedep->id_mkdiradd);
 	db_printf("\tinoupdt %p, newinoupdt %p, extupdt %p, newextupdt %p\n",
 	    TAILQ_FIRST(&inodedep->id_inoupdt),
 	    TAILQ_FIRST(&inodedep->id_newinoupdt),
 	    TAILQ_FIRST(&inodedep->id_extupdt),
 	    TAILQ_FIRST(&inodedep->id_newextupdt));
 }
 
 DB_SHOW_COMMAND(inodedep, db_show_inodedep)
 {
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	inodedep_print((struct inodedep*)addr, 1);
 }
 
 DB_SHOW_COMMAND(inodedeps, db_show_inodedeps)
 {
 	struct inodedep_hashhead *inodedephd;
 	struct inodedep *inodedep;
 	struct ufsmount *ump;
 	int cnt;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	ump = (struct ufsmount *)addr;
 	for (cnt = 0; cnt < ump->inodedep_hash_size; cnt++) {
 		inodedephd = &ump->inodedep_hashtbl[cnt];
 		LIST_FOREACH(inodedep, inodedephd, id_hash) {
 			inodedep_print(inodedep, 0);
 		}
 	}
 }
 
 DB_SHOW_COMMAND(worklist, db_show_worklist)
 {
 	struct worklist *wk;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	wk = (struct worklist *)addr;
 	printf("worklist: %p type %s state 0x%X\n",
 	    wk, TYPENAME(wk->wk_type), wk->wk_state);
 }
 
 DB_SHOW_COMMAND(workhead, db_show_workhead)
 {
 	struct workhead *wkhd;
 	struct worklist *wk;
 	int i;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	wkhd = (struct workhead *)addr;
 	wk = LIST_FIRST(wkhd);
 	for (i = 0; i < 100 && wk != NULL; i++, wk = LIST_NEXT(wk, wk_list))
 		db_printf("worklist: %p type %s state 0x%X",
 		    wk, TYPENAME(wk->wk_type), wk->wk_state);
 	if (i == 100)
 		db_printf("workhead overflow");
 	printf("\n");
 }
 
 
 DB_SHOW_COMMAND(mkdirs, db_show_mkdirs)
 {
 	struct mkdirlist *mkdirlisthd;
 	struct jaddref *jaddref;
 	struct diradd *diradd;
 	struct mkdir *mkdir;
 
 	if (have_addr == 0) {
 		db_printf("Address required\n");
 		return;
 	}
 	mkdirlisthd = (struct mkdirlist *)addr;
 	LIST_FOREACH(mkdir, mkdirlisthd, md_mkdirs) {
 		diradd = mkdir->md_diradd;
 		db_printf("mkdir: %p state 0x%X dap %p state 0x%X",
 		    mkdir, mkdir->md_state, diradd, diradd->da_state);
 		if ((jaddref = mkdir->md_jaddref) != NULL)
 			db_printf(" jaddref %p jaddref state 0x%X",
 			    jaddref, jaddref->ja_state);
 		db_printf("\n");
 	}
 }
 
 /* exported to ffs_vfsops.c */
 extern void db_print_ffs(struct ufsmount *ump);
 void
 db_print_ffs(struct ufsmount *ump)
 {
 	db_printf("mp %p %s devvp %p fs %p su_wl %d su_deps %d su_req %d\n",
 	    ump->um_mountp, ump->um_mountp->mnt_stat.f_mntonname,
 	    ump->um_devvp, ump->um_fs, ump->softdep_on_worklist,
 	    ump->softdep_deps, ump->softdep_req);
 }
 
 #endif /* DDB */
 
 #endif /* SOFTUPDATES */
Index: head/sys/ufs/ufs/ufs_vnops.c
===================================================================
--- head/sys/ufs/ufs/ufs_vnops.c	(revision 311521)
+++ head/sys/ufs/ufs/ufs_vnops.c	(revision 311522)
@@ -1,2834 +1,2834 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ufs_vnops.c	8.27 (Berkeley) 5/27/95
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_quota.h"
 #include "opt_suiddir.h"
 #include "opt_ufs.h"
 #include "opt_ffs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/namei.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/filio.h>
 #include <sys/stat.h>
 #include <sys/bio.h>
 #include <sys/buf.h>
 #include <sys/mount.h>
 #include <sys/priv.h>
 #include <sys/refcount.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/dirent.h>
 #include <sys/lockf.h>
 #include <sys/conf.h>
 #include <sys/acl.h>
 
 #include <security/mac/mac_framework.h>
 
 #include <sys/file.h>		/* XXX */
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/acl.h>
 #include <ufs/ufs/extattr.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/dir.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 #ifdef UFS_DIRHASH
 #include <ufs/ufs/dirhash.h>
 #endif
 #ifdef UFS_GJOURNAL
 #include <ufs/ufs/gjournal.h>
 FEATURE(ufs_gjournal, "Journaling support through GEOM for UFS");
 #endif
 
 #ifdef QUOTA
 FEATURE(ufs_quota, "UFS disk quotas support");
 FEATURE(ufs_quota64, "64bit UFS disk quotas support");
 #endif
 
 #ifdef SUIDDIR
 FEATURE(suiddir,
     "Give all new files in directory the same ownership as the directory");
 #endif
 
 
 #include <ufs/ffs/ffs_extern.h>
 
 static vop_accessx_t	ufs_accessx;
 static int ufs_chmod(struct vnode *, int, struct ucred *, struct thread *);
 static int ufs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *);
 static vop_close_t	ufs_close;
 static vop_create_t	ufs_create;
 static vop_getattr_t	ufs_getattr;
 static vop_ioctl_t	ufs_ioctl;
 static vop_link_t	ufs_link;
 static int ufs_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *, const char *);
 static vop_markatime_t	ufs_markatime;
 static vop_mkdir_t	ufs_mkdir;
 static vop_mknod_t	ufs_mknod;
 static vop_open_t	ufs_open;
 static vop_pathconf_t	ufs_pathconf;
 static vop_print_t	ufs_print;
 static vop_readlink_t	ufs_readlink;
 static vop_remove_t	ufs_remove;
 static vop_rename_t	ufs_rename;
 static vop_rmdir_t	ufs_rmdir;
 static vop_setattr_t	ufs_setattr;
 static vop_strategy_t	ufs_strategy;
 static vop_symlink_t	ufs_symlink;
 static vop_whiteout_t	ufs_whiteout;
 static vop_close_t	ufsfifo_close;
 static vop_kqfilter_t	ufsfifo_kqfilter;
 static vop_pathconf_t	ufsfifo_pathconf;
 
 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
 
 /*
  * A virgin directory (no blushing please).
  */
 static struct dirtemplate mastertemplate = {
 	0, 12, DT_DIR, 1, ".",
 	0, DIRBLKSIZ - 12, DT_DIR, 2, ".."
 };
 static struct odirtemplate omastertemplate = {
 	0, 12, 1, ".",
 	0, DIRBLKSIZ - 12, 2, ".."
 };
 
 static void
 ufs_itimes_locked(struct vnode *vp)
 {
 	struct inode *ip;
 	struct timespec ts;
 
 	ASSERT_VI_LOCKED(vp, __func__);
 
 	ip = VTOI(vp);
 	if (UFS_RDONLY(ip))
 		goto out;
 	if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0)
 		return;
 
 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && !DOINGSOFTDEP(vp))
 		ip->i_flag |= IN_LAZYMOD;
 	else if (((vp->v_mount->mnt_kern_flag &
 		    (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) ||
 		    (ip->i_flag & (IN_CHANGE | IN_UPDATE)))
 		ip->i_flag |= IN_MODIFIED;
 	else if (ip->i_flag & IN_ACCESS)
 		ip->i_flag |= IN_LAZYACCESS;
 	vfs_timestamp(&ts);
 	if (ip->i_flag & IN_ACCESS) {
 		DIP_SET(ip, i_atime, ts.tv_sec);
 		DIP_SET(ip, i_atimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_UPDATE) {
 		DIP_SET(ip, i_mtime, ts.tv_sec);
 		DIP_SET(ip, i_mtimensec, ts.tv_nsec);
 	}
 	if (ip->i_flag & IN_CHANGE) {
 		DIP_SET(ip, i_ctime, ts.tv_sec);
 		DIP_SET(ip, i_ctimensec, ts.tv_nsec);
 		DIP_SET(ip, i_modrev, DIP(ip, i_modrev) + 1);
 	}
 
  out:
 	ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE);
 }
 
 void
 ufs_itimes(struct vnode *vp)
 {
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 }
 
 /*
  * Create a regular file
  */
 static int
 ufs_create(ap)
 	struct vop_create_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	int error;
 
 	error =
 	    ufs_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode),
 	    ap->a_dvp, ap->a_vpp, ap->a_cnp, "ufs_create");
 	if (error != 0)
 		return (error);
 	if ((ap->a_cnp->cn_flags & MAKEENTRY) != 0)
 		cache_enter(ap->a_dvp, *ap->a_vpp, ap->a_cnp);
 	return (0);
 }
 
 /*
  * Mknod vnode call
  */
 /* ARGSUSED */
 static int
 ufs_mknod(ap)
 	struct vop_mknod_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode **vpp = ap->a_vpp;
 	struct inode *ip;
 	ino_t ino;
 	int error;
 
 	error = ufs_makeinode(MAKEIMODE(vap->va_type, vap->va_mode),
 	    ap->a_dvp, vpp, ap->a_cnp, "ufs_mknod");
 	if (error)
 		return (error);
 	ip = VTOI(*vpp);
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	if (vap->va_rdev != VNOVAL) {
 		/*
 		 * Want to be able to use this to make badblock
 		 * inodes, so don't truncate the dev number.
 		 */
 		DIP_SET(ip, i_rdev, vap->va_rdev);
 	}
 	/*
 	 * Remove inode, then reload it through VFS_VGET so it is
 	 * checked to see if it is an alias of an existing entry in
 	 * the inode cache.  XXX I don't believe this is necessary now.
 	 */
 	(*vpp)->v_type = VNON;
 	ino = ip->i_number;	/* Save this before vgone() invalidates ip. */
 	vgone(*vpp);
 	vput(*vpp);
 	error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp);
 	if (error) {
 		*vpp = NULL;
 		return (error);
 	}
 	return (0);
 }
 
 /*
  * Open called.
  */
 /* ARGSUSED */
 static int
 ufs_open(struct vop_open_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip;
 
 	if (vp->v_type == VCHR || vp->v_type == VBLK)
 		return (EOPNOTSUPP);
 
 	ip = VTOI(vp);
 	/*
 	 * Files marked append-only must be opened for appending.
 	 */
 	if ((ip->i_flags & APPEND) &&
 	    (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE)
 		return (EPERM);
 	vnode_create_vobject(vp, DIP(ip, i_size), ap->a_td);
 	return (0);
 }
 
 /*
  * Close called.
  *
  * Update the times on the inode.
  */
 /* ARGSUSED */
 static int
 ufs_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int usecount;
 
 	VI_LOCK(vp);
 	usecount = vp->v_usecount;
 	if (usecount > 1)
 		ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (0);
 }
 
 static int
 ufs_accessx(ap)
 	struct vop_accessx_args /* {
 		struct vnode *a_vp;
 		accmode_t a_accmode;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	accmode_t accmode = ap->a_accmode;
 	int error;
 #ifdef QUOTA
 	int relocked;
 #endif
 #ifdef UFS_ACL
 	struct acl *acl;
 	acl_type_t type;
 #endif
 
 	/*
 	 * Disallow write attempts on read-only filesystems;
 	 * unless the file is a socket, fifo, or a block or
 	 * character device resident on the filesystem.
 	 */
 	if (accmode & VMODIFY_PERMS) {
 		switch (vp->v_type) {
 		case VDIR:
 		case VLNK:
 		case VREG:
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 #ifdef QUOTA
 			/*
 			 * Inode is accounted in the quotas only if struct
 			 * dquot is attached to it. VOP_ACCESS() is called
 			 * from vn_open_cred() and provides a convenient
 			 * point to call getinoquota().
 			 */
 			if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
 
 				/*
 				 * Upgrade vnode lock, since getinoquota()
 				 * requires exclusive lock to modify inode.
 				 */
 				relocked = 1;
 				vhold(vp);
 				vn_lock(vp, LK_UPGRADE | LK_RETRY);
 				VI_LOCK(vp);
 				if (vp->v_iflag & VI_DOOMED) {
 					vdropl(vp);
 					error = ENOENT;
 					goto relock;
 				}
 				vdropl(vp);
 			} else
 				relocked = 0;
 			error = getinoquota(ip);
 relock:
 			if (relocked)
 				vn_lock(vp, LK_DOWNGRADE | LK_RETRY);
 			if (error != 0)
 				return (error);
 #endif
 			break;
 		default:
 			break;
 		}
 	}
 
 	/*
 	 * If immutable bit set, nobody gets to write it.  "& ~VADMIN_PERMS"
 	 * permits the owner of the file to remove the IMMUTABLE flag.
 	 */
 	if ((accmode & (VMODIFY_PERMS & ~VADMIN_PERMS)) &&
 	    (ip->i_flags & (IMMUTABLE | SF_SNAPSHOT)))
 		return (EPERM);
 
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS)) != 0) {
 		if (vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			type = ACL_TYPE_NFS4;
 		else
 			type = ACL_TYPE_ACCESS;
 
 		acl = acl_alloc(M_WAITOK);
 		if (type == ACL_TYPE_NFS4)
 			error = ufs_getacl_nfs4_internal(vp, acl, ap->a_td);
 		else
 			error = VOP_GETACL(vp, type, acl, ap->a_cred, ap->a_td);
 		switch (error) {
 		case 0:
 			if (type == ACL_TYPE_NFS4) {
 				error = vaccess_acl_nfs4(vp->v_type, ip->i_uid,
 				    ip->i_gid, acl, accmode, ap->a_cred, NULL);
 			} else {
 				error = vfs_unixify_accmode(&accmode);
 				if (error == 0)
 					error = vaccess_acl_posix1e(vp->v_type, ip->i_uid,
 					    ip->i_gid, acl, accmode, ap->a_cred, NULL);
 			}
 			break;
 		default:
 			if (error != EOPNOTSUPP)
 				printf(
 "ufs_accessx(): Error retrieving ACL on object (%d).\n",
 				    error);
 			/*
 			 * XXX: Fall back until debugged.  Should
 			 * eventually possibly log an error, and return
 			 * EPERM for safety.
 			 */
 			error = vfs_unixify_accmode(&accmode);
 			if (error == 0)
 				error = vaccess(vp->v_type, ip->i_mode, ip->i_uid,
 				    ip->i_gid, accmode, ap->a_cred, NULL);
 		}
 		acl_free(acl);
 
 		return (error);
 	}
 #endif /* !UFS_ACL */
 	error = vfs_unixify_accmode(&accmode);
 	if (error == 0)
 		error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid,
 		    accmode, ap->a_cred, NULL);
 	return (error);
 }
 
 /* ARGSUSED */
 static int
 ufs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct vattr *vap = ap->a_vap;
 
 	VI_LOCK(vp);
 	ufs_itimes_locked(vp);
 	if (I_IS_UFS1(ip)) {
 		vap->va_atime.tv_sec = ip->i_din1->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din1->di_atimensec;
 	} else {
 		vap->va_atime.tv_sec = ip->i_din2->di_atime;
 		vap->va_atime.tv_nsec = ip->i_din2->di_atimensec;
 	}
 	VI_UNLOCK(vp);
 	/*
 	 * Copy from inode table
 	 */
 	vap->va_fsid = dev2udev(ITOUMP(ip)->um_dev);
 	vap->va_fileid = ip->i_number;
 	vap->va_mode = ip->i_mode & ~IFMT;
 	vap->va_nlink = ip->i_effnlink;
 	vap->va_uid = ip->i_uid;
 	vap->va_gid = ip->i_gid;
 	if (I_IS_UFS1(ip)) {
 		vap->va_rdev = ip->i_din1->di_rdev;
 		vap->va_size = ip->i_din1->di_size;
 		vap->va_mtime.tv_sec = ip->i_din1->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din1->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din1->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din1->di_ctimensec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din1->di_blocks);
 		vap->va_filerev = ip->i_din1->di_modrev;
 	} else {
 		vap->va_rdev = ip->i_din2->di_rdev;
 		vap->va_size = ip->i_din2->di_size;
 		vap->va_mtime.tv_sec = ip->i_din2->di_mtime;
 		vap->va_mtime.tv_nsec = ip->i_din2->di_mtimensec;
 		vap->va_ctime.tv_sec = ip->i_din2->di_ctime;
 		vap->va_ctime.tv_nsec = ip->i_din2->di_ctimensec;
 		vap->va_birthtime.tv_sec = ip->i_din2->di_birthtime;
 		vap->va_birthtime.tv_nsec = ip->i_din2->di_birthnsec;
 		vap->va_bytes = dbtob((u_quad_t)ip->i_din2->di_blocks);
 		vap->va_filerev = ip->i_din2->di_modrev;
 	}
 	vap->va_flags = ip->i_flags;
 	vap->va_gen = ip->i_gen;
 	vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize;
 	vap->va_type = IFTOVT(ip->i_mode);
 	return (0);
 }
 
 /*
  * Set attribute vnode op. called from several syscalls
  */
 static int
 ufs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vattr *vap = ap->a_vap;
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	struct ucred *cred = ap->a_cred;
 	struct thread *td = curthread;
 	int error;
 
 	/*
 	 * Check for unsettable attributes.
 	 */
 	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
 	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
 	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
 	    ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
 		return (EINVAL);
 	}
 	if (vap->va_flags != VNOVAL) {
 		if ((vap->va_flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE |
 		    SF_NOUNLINK | SF_SNAPSHOT | UF_APPEND | UF_ARCHIVE |
 		    UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK |
 		    UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE |
 		    UF_SPARSE | UF_SYSTEM)) != 0)
 			return (EOPNOTSUPP);
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		/*
 		 * Callers may only modify the file flags on objects they
 		 * have VADMIN rights for.
 		 */
 		if ((error = VOP_ACCESS(vp, VADMIN, cred, td)))
 			return (error);
 		/*
 		 * Unprivileged processes are not permitted to unset system
 		 * flags, or modify flags if any system flags are set.
 		 * Privileged non-jail processes may not modify system flags
 		 * if securelevel > 0 and any existing system flags are set.
 		 * Privileged jail processes behave like privileged non-jail
 		 * processes if the security.jail.chflags_allowed sysctl is
 		 * is non-zero; otherwise, they behave like unprivileged
 		 * processes.
 		 */
 		if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) {
 				error = securelevel_gt(cred, 0);
 				if (error)
 					return (error);
 			}
 			/* The snapshot flag cannot be toggled. */
 			if ((vap->va_flags ^ ip->i_flags) & SF_SNAPSHOT)
 				return (EPERM);
 		} else {
 			if (ip->i_flags &
 			    (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) ||
 			    ((vap->va_flags ^ ip->i_flags) & SF_SETTABLE))
 				return (EPERM);
 		}
 		ip->i_flags = vap->va_flags;
 		DIP_SET(ip, i_flags, vap->va_flags);
 		ip->i_flag |= IN_CHANGE;
 		error = UFS_UPDATE(vp, 0);
 		if (ip->i_flags & (IMMUTABLE | APPEND))
 			return (error);
 	}
 	/*
 	 * If immutable or append, no one can change any of its attributes
 	 * except the ones already handled (in some cases, file flags
 	 * including the immutability flags themselves for the superuser).
 	 */
 	if (ip->i_flags & (IMMUTABLE | APPEND))
 		return (EPERM);
 	/*
 	 * Go through the fields and update iff not VNOVAL.
 	 */
 	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((error = ufs_chown(vp, vap->va_uid, vap->va_gid, cred,
 		    td)) != 0)
 			return (error);
 	}
 	if (vap->va_size != VNOVAL) {
 		/*
 		 * XXX most of the following special cases should be in
 		 * callers instead of in N filesystems.  The VDIR check
 		 * mostly already is.
 		 */
 		switch (vp->v_type) {
 		case VDIR:
 			return (EISDIR);
 		case VLNK:
 		case VREG:
 			/*
 			 * Truncation should have an effect in these cases.
 			 * Disallow it if the filesystem is read-only or
 			 * the file is being snapshotted.
 			 */
 			if (vp->v_mount->mnt_flag & MNT_RDONLY)
 				return (EROFS);
 			if ((ip->i_flags & SF_SNAPSHOT) != 0)
 				return (EPERM);
 			break;
 		default:
 			/*
 			 * According to POSIX, the result is unspecified
 			 * for file types other than regular files,
 			 * directories and shared memory objects.  We
 			 * don't support shared memory objects in the file
 			 * system, and have dubious support for truncating
 			 * symlinks.  Just ignore the request in other cases.
 			 */
 			return (0);
 		}
 		if ((error = UFS_TRUNCATE(vp, vap->va_size, IO_NORMAL |
 		    ((vap->va_vaflags & VA_SYNC) != 0 ? IO_SYNC : 0),
 		    cred)) != 0)
 			return (error);
 	}
 	if (vap->va_atime.tv_sec != VNOVAL ||
 	    vap->va_mtime.tv_sec != VNOVAL ||
 	    vap->va_birthtime.tv_sec != VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((ip->i_flags & SF_SNAPSHOT) != 0)
 			return (EPERM);
 		error = vn_utimes_perm(vp, vap, cred, td);
 		if (error != 0)
 			return (error);
 		ip->i_flag |= IN_CHANGE | IN_MODIFIED;
 		if (vap->va_atime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_ACCESS;
 			DIP_SET(ip, i_atime, vap->va_atime.tv_sec);
 			DIP_SET(ip, i_atimensec, vap->va_atime.tv_nsec);
 		}
 		if (vap->va_mtime.tv_sec != VNOVAL) {
 			ip->i_flag &= ~IN_UPDATE;
 			DIP_SET(ip, i_mtime, vap->va_mtime.tv_sec);
 			DIP_SET(ip, i_mtimensec, vap->va_mtime.tv_nsec);
 		}
 		if (vap->va_birthtime.tv_sec != VNOVAL && I_IS_UFS2(ip)) {
 			ip->i_din2->di_birthtime = vap->va_birthtime.tv_sec;
 			ip->i_din2->di_birthnsec = vap->va_birthtime.tv_nsec;
 		}
 		error = UFS_UPDATE(vp, 0);
 		if (error)
 			return (error);
 	}
 	error = 0;
 	if (vap->va_mode != (mode_t)VNOVAL) {
 		if (vp->v_mount->mnt_flag & MNT_RDONLY)
 			return (EROFS);
 		if ((ip->i_flags & SF_SNAPSHOT) != 0 && (vap->va_mode &
 		   (S_IXUSR | S_IWUSR | S_IXGRP | S_IWGRP | S_IXOTH | S_IWOTH)))
 			return (EPERM);
 		error = ufs_chmod(vp, (int)vap->va_mode, cred, td);
 	}
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_update_nfs4_acl_after_mode_change(struct vnode *vp, int mode,
     int file_owner_id, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *aclp;
 
 	aclp = acl_alloc(M_WAITOK);
 	error = ufs_getacl_nfs4_internal(vp, aclp, td);
 	/*
 	 * We don't have to handle EOPNOTSUPP here, as the filesystem claims
 	 * it supports ACLs.
 	 */
 	if (error)
 		goto out;
 
 	acl_nfs4_sync_acl_from_mode(aclp, mode, file_owner_id);
 	error = ufs_setacl_nfs4_internal(vp, aclp, td);
 
 out:
 	acl_free(aclp);
 	return (error);
 }
 #endif /* UFS_ACL */
 
 /*
  * Mark this file's access time for update for vfs_mark_atime().  This
  * is called from execve() and mmap().
  */
 static int
 ufs_markatime(ap)
 	struct vop_markatime_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	VI_LOCK(vp);
 	ip->i_flag |= IN_ACCESS;
 	VI_UNLOCK(vp);
 	/*
 	 * XXXKIB No UFS_UPDATE(ap->a_vp, 0) there.
 	 */
 	return (0);
 }
 
 /*
  * Change the mode on a file.
  * Inode must be locked before calling.
  */
 static int
 ufs_chmod(vp, mode, cred, td)
 	struct vnode *vp;
 	int mode;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct inode *ip = VTOI(vp);
 	int error;
 
 	/*
 	 * To modify the permissions on a file, must possess VADMIN
 	 * for that file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_ACL, cred, td)))
 		return (error);
 	/*
 	 * Privileged processes may set the sticky bit on non-directories,
 	 * as well as set the setgid bit on a file with a group that the
 	 * process is not a member of.  Both of these are allowed in
 	 * jail(8).
 	 */
 	if (vp->v_type != VDIR && (mode & S_ISTXT)) {
 		if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0))
 			return (EFTYPE);
 	}
 	if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) {
 		error = priv_check_cred(cred, PRIV_VFS_SETGID, 0);
 		if (error)
 			return (error);
 	}
 
 	/*
 	 * Deny setting setuid if we are not the file owner.
 	 */
 	if ((mode & ISUID) && ip->i_uid != cred->cr_uid) {
 		error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0);
 		if (error)
 			return (error);
 	}
 
 	ip->i_mode &= ~ALLPERMS;
 	ip->i_mode |= (mode & ALLPERMS);
 	DIP_SET(ip, i_mode, ip->i_mode);
 	ip->i_flag |= IN_CHANGE;
 #ifdef UFS_ACL
 	if ((vp->v_mount->mnt_flag & MNT_NFS4ACLS) != 0)
 		error = ufs_update_nfs4_acl_after_mode_change(vp, mode, ip->i_uid, cred, td);
 #endif
 	if (error == 0 && (ip->i_flag & IN_CHANGE) != 0)
 		error = UFS_UPDATE(vp, 0);
 
 	return (error);
 }
 
 /*
  * Perform chown operation on inode ip;
  * inode must be locked prior to call.
  */
 static int
 ufs_chown(vp, uid, gid, cred, td)
 	struct vnode *vp;
 	uid_t uid;
 	gid_t gid;
 	struct ucred *cred;
 	struct thread *td;
 {
 	struct inode *ip = VTOI(vp);
 	uid_t ouid;
 	gid_t ogid;
 	int error = 0;
 #ifdef QUOTA
 	int i;
 	ufs2_daddr_t change;
 #endif
 
 	if (uid == (uid_t)VNOVAL)
 		uid = ip->i_uid;
 	if (gid == (gid_t)VNOVAL)
 		gid = ip->i_gid;
 	/*
 	 * To modify the ownership of a file, must possess VADMIN for that
 	 * file.
 	 */
 	if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td)))
 		return (error);
 	/*
 	 * To change the owner of a file, or change the group of a file to a
 	 * group of which we are not a member, the caller must have
 	 * privilege.
 	 */
 	if (((uid != ip->i_uid && uid != cred->cr_uid) || 
 	    (gid != ip->i_gid && !groupmember(gid, cred))) &&
 	    (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0)))
 		return (error);
 	ogid = ip->i_gid;
 	ouid = ip->i_uid;
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) != 0)
 		return (error);
 	if (ouid == uid) {
 		dqrele(vp, ip->i_dquot[USRQUOTA]);
 		ip->i_dquot[USRQUOTA] = NODQUOT;
 	}
 	if (ogid == gid) {
 		dqrele(vp, ip->i_dquot[GRPQUOTA]);
 		ip->i_dquot[GRPQUOTA] = NODQUOT;
 	}
 	change = DIP(ip, i_blocks);
 	(void) chkdq(ip, -change, cred, CHOWN);
 	(void) chkiq(ip, -1, cred, CHOWN);
 	for (i = 0; i < MAXQUOTAS; i++) {
 		dqrele(vp, ip->i_dquot[i]);
 		ip->i_dquot[i] = NODQUOT;
 	}
 #endif
 	ip->i_gid = gid;
 	DIP_SET(ip, i_gid, gid);
 	ip->i_uid = uid;
 	DIP_SET(ip, i_uid, uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		if ((error = chkdq(ip, change, cred, CHOWN)) == 0) {
 			if ((error = chkiq(ip, 1, cred, CHOWN)) == 0)
 				goto good;
 			else
 				(void) chkdq(ip, -change, cred, CHOWN|FORCE);
 		}
 		for (i = 0; i < MAXQUOTAS; i++) {
 			dqrele(vp, ip->i_dquot[i]);
 			ip->i_dquot[i] = NODQUOT;
 		}
 	}
 	ip->i_gid = ogid;
 	DIP_SET(ip, i_gid, ogid);
 	ip->i_uid = ouid;
 	DIP_SET(ip, i_uid, ouid);
 	if (getinoquota(ip) == 0) {
 		if (ouid == uid) {
 			dqrele(vp, ip->i_dquot[USRQUOTA]);
 			ip->i_dquot[USRQUOTA] = NODQUOT;
 		}
 		if (ogid == gid) {
 			dqrele(vp, ip->i_dquot[GRPQUOTA]);
 			ip->i_dquot[GRPQUOTA] = NODQUOT;
 		}
 		(void) chkdq(ip, change, cred, FORCE|CHOWN);
 		(void) chkiq(ip, 1, cred, FORCE|CHOWN);
 		(void) getinoquota(ip);
 	}
 	return (error);
 good:
 	if (getinoquota(ip))
 		panic("ufs_chown: lost quota");
 #endif /* QUOTA */
 	ip->i_flag |= IN_CHANGE;
 	if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) {
 		if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) {
 			ip->i_mode &= ~(ISUID | ISGID);
 			DIP_SET(ip, i_mode, ip->i_mode);
 		}
 	}
 	error = UFS_UPDATE(vp, 0);
 	return (error);
 }
 
 static int
 ufs_remove(ap)
 	struct vop_remove_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct inode *ip;
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	int error;
 	struct thread *td;
 
 	td = curthread;
 	ip = VTOI(vp);
 	if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(dvp)->i_flags & APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	error = ufs_dirremove(dvp, ip, ap->a_cnp->cn_flags, 0);
 	if (ip->i_nlink <= 0)
 		vp->v_vflag |= VV_NOSYNC;
 	if ((ip->i_flags & SF_SNAPSHOT) != 0) {
 		/*
 		 * Avoid deadlock where another thread is trying to
 		 * update the inodeblock for dvp and is waiting on
 		 * snaplk.  Temporary unlock the vnode lock for the
 		 * unlinked file and sync the directory.  This should
 		 * allow vput() of the directory to not block later on
 		 * while holding the snapshot vnode locked, assuming
 		 * that the directory hasn't been unlinked too.
 		 */
 		VOP_UNLOCK(vp, 0);
 		(void) VOP_FSYNC(dvp, MNT_WAIT, td);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 	}
 out:
 	return (error);
 }
 
 static void
 print_bad_link_count(const char *funcname, struct vnode *dvp)
 {
 	struct inode *dip;
 
 	dip = VTOI(dvp);
-	uprintf("%s: Bad link count %d on parent inode %d in file system %s\n",
-	    funcname, dip->i_effnlink, dip->i_number,
+	uprintf("%s: Bad link count %d on parent inode %jd in file system %s\n",
+	    funcname, dip->i_effnlink, (intmax_t)dip->i_number,
 	    dvp->v_mount->mnt_stat.f_mntonname);
 }
 
 /*
  * link vnode call
  */
 static int
 ufs_link(ap)
 	struct vop_link_args /* {
 		struct vnode *a_tdvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip;
 	struct direct newdir;
 	int error;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_link: no name");
 #endif
 	if (VTOI(tdvp)->i_effnlink < 2) {
 		print_bad_link_count("ufs_link", tdvp);
 		error = EINVAL;
 		goto out;
 	}
 	ip = VTOI(vp);
 	if ((nlink_t)ip->i_nlink >= LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	/*
 	 * The file may have been removed after namei droped the original
 	 * lock.
 	 */
 	if (ip->i_effnlink == 0) {
 		error = ENOENT;
 		goto out;
 	}
 	if (ip->i_flags & (IMMUTABLE | APPEND)) {
 		error = EPERM;
 		goto out;
 	}
 	ip->i_effnlink++;
 	ip->i_nlink++;
 	DIP_SET(ip, i_nlink, ip->i_nlink);
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_link(VTOI(tdvp), ip);
 	error = UFS_UPDATE(vp, !DOINGSOFTDEP(vp) && !DOINGASYNC(vp));
 	if (!error) {
 		ufs_makedirentry(ip, cnp, &newdir);
 		error = ufs_direnter(tdvp, vp, &newdir, cnp, NULL, 0);
 	}
 
 	if (error) {
 		ip->i_effnlink--;
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_link(VTOI(tdvp), ip);
 	}
 out:
 	return (error);
 }
 
 /*
  * whiteout vnode call
  */
 static int
 ufs_whiteout(ap)
 	struct vop_whiteout_args /* {
 		struct vnode *a_dvp;
 		struct componentname *a_cnp;
 		int a_flags;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct direct newdir;
 	int error = 0;
 
 	switch (ap->a_flags) {
 	case LOOKUP:
 		/* 4.4 format directories support whiteout operations */
 		if (dvp->v_mount->mnt_maxsymlinklen > 0)
 			return (0);
 		return (EOPNOTSUPP);
 
 	case CREATE:
 		/* create a new directory whiteout */
 #ifdef INVARIANTS
 		if ((cnp->cn_flags & SAVENAME) == 0)
 			panic("ufs_whiteout: missing name");
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		newdir.d_ino = WINO;
 		newdir.d_namlen = cnp->cn_namelen;
 		bcopy(cnp->cn_nameptr, newdir.d_name, (unsigned)cnp->cn_namelen + 1);
 		newdir.d_type = DT_WHT;
 		error = ufs_direnter(dvp, NULL, &newdir, cnp, NULL, 0);
 		break;
 
 	case DELETE:
 		/* remove an existing directory whiteout */
 #ifdef INVARIANTS
 		if (dvp->v_mount->mnt_maxsymlinklen <= 0)
 			panic("ufs_whiteout: old format filesystem");
 #endif
 
 		cnp->cn_flags &= ~DOWHITEOUT;
 		error = ufs_dirremove(dvp, NULL, cnp->cn_flags, 0);
 		break;
 	default:
 		panic("ufs_whiteout: unknown op");
 	}
 	return (error);
 }
 
 static volatile int rename_restarts;
 SYSCTL_INT(_vfs_ufs, OID_AUTO, rename_restarts, CTLFLAG_RD,
     __DEVOLATILE(int *, &rename_restarts), 0,
     "Times rename had to restart due to lock contention");
 
 /*
  * Rename system call.
  * 	rename("foo", "bar");
  * is essentially
  *	unlink("bar");
  *	link("foo", "bar");
  *	unlink("foo");
  * but ``atomically''.  Can't do full commit without saving state in the
  * inode on disk which isn't feasible at this time.  Best we can do is
  * always guarantee the target exists.
  *
  * Basic algorithm is:
  *
  * 1) Bump link count on source while we're linking it to the
  *    target.  This also ensure the inode won't be deleted out
  *    from underneath us while we work (it may be truncated by
  *    a concurrent `trunc' or `open' for creation).
  * 2) Link source to destination.  If destination already exists,
  *    delete it first.
  * 3) Unlink source reference to inode if still around. If a
  *    directory was moved and the parent of the destination
  *    is different from the source, patch the ".." entry in the
  *    directory.
  */
 static int
 ufs_rename(ap)
 	struct vop_rename_args  /* {
 		struct vnode *a_fdvp;
 		struct vnode *a_fvp;
 		struct componentname *a_fcnp;
 		struct vnode *a_tdvp;
 		struct vnode *a_tvp;
 		struct componentname *a_tcnp;
 	} */ *ap;
 {
 	struct vnode *tvp = ap->a_tvp;
 	struct vnode *tdvp = ap->a_tdvp;
 	struct vnode *fvp = ap->a_fvp;
 	struct vnode *fdvp = ap->a_fdvp;
 	struct vnode *nvp;
 	struct componentname *tcnp = ap->a_tcnp;
 	struct componentname *fcnp = ap->a_fcnp;
 	struct thread *td = fcnp->cn_thread;
 	struct inode *fip, *tip, *tdp, *fdp;
 	struct direct newdir;
 	off_t endoff;
 	int doingdirectory, newparent;
 	int error = 0;
 	struct mount *mp;
 	ino_t ino;
 
 #ifdef INVARIANTS
 	if ((tcnp->cn_flags & HASBUF) == 0 ||
 	    (fcnp->cn_flags & HASBUF) == 0)
 		panic("ufs_rename: no name");
 #endif
 	endoff = 0;
 	mp = tdvp->v_mount;
 	VOP_UNLOCK(tdvp, 0);
 	if (tvp && tvp != tdvp)
 		VOP_UNLOCK(tvp, 0);
 	/*
 	 * Check for cross-device rename.
 	 */
 	if ((fvp->v_mount != tdvp->v_mount) ||
 	    (tvp && (fvp->v_mount != tvp->v_mount))) {
 		error = EXDEV;
 		mp = NULL;
 		goto releout;
 	}
 relock:
 	/* 
 	 * We need to acquire 2 to 4 locks depending on whether tvp is NULL
 	 * and fdvp and tdvp are the same directory.  Subsequently we need
 	 * to double-check all paths and in the directory rename case we
 	 * need to verify that we are not creating a directory loop.  To
 	 * handle this we acquire all but fdvp using non-blocking
 	 * acquisitions.  If we fail to acquire any lock in the path we will
 	 * drop all held locks, acquire the new lock in a blocking fashion,
 	 * and then release it and restart the rename.  This acquire/release
 	 * step ensures that we do not spin on a lock waiting for release.
 	 */
 	error = vn_lock(fdvp, LK_EXCLUSIVE);
 	if (error)
 		goto releout;
 	if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) {
 		VOP_UNLOCK(fdvp, 0);
 		error = vn_lock(tdvp, LK_EXCLUSIVE);
 		if (error)
 			goto releout;
 		VOP_UNLOCK(tdvp, 0);
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	/*
 	 * Re-resolve fvp to be certain it still exists and fetch the
 	 * correct vnode.
 	 */
 	error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 	if (error) {
 		VOP_UNLOCK(fdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		goto releout;
 	}
 	error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 	if (error) {
 		VOP_UNLOCK(fdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		if (error != EBUSY)
 			goto releout;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 		if (error != 0)
 			goto releout;
 		VOP_UNLOCK(nvp, 0);
 		vrele(fvp);
 		fvp = nvp;
 		atomic_add_int(&rename_restarts, 1);
 		goto relock;
 	}
 	vrele(fvp);
 	fvp = nvp;
 	/*
 	 * Re-resolve tvp and acquire the vnode lock if present.
 	 */
 	error = ufs_lookup_ino(tdvp, NULL, tcnp, &ino);
 	if (error != 0 && error != EJUSTRETURN) {
 		VOP_UNLOCK(fdvp, 0);
 		VOP_UNLOCK(tdvp, 0);
 		VOP_UNLOCK(fvp, 0);
 		goto releout;
 	}
 	/*
 	 * If tvp disappeared we just carry on.
 	 */
 	if (error == EJUSTRETURN && tvp != NULL) {
 		vrele(tvp);
 		tvp = NULL;
 	}
 	/*
 	 * Get the tvp ino if the lookup succeeded.  We may have to restart
 	 * if the non-blocking acquire fails.
 	 */
 	if (error == 0) {
 		nvp = NULL;
 		error = VFS_VGET(mp, ino, LK_EXCLUSIVE | LK_NOWAIT, &nvp);
 		if (tvp)
 			vrele(tvp);
 		tvp = nvp;
 		if (error) {
 			VOP_UNLOCK(fdvp, 0);
 			VOP_UNLOCK(tdvp, 0);
 			VOP_UNLOCK(fvp, 0);
 			if (error != EBUSY)
 				goto releout;
 			error = VFS_VGET(mp, ino, LK_EXCLUSIVE, &nvp);
 			if (error != 0)
 				goto releout;
 			vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 	}
 	fdp = VTOI(fdvp);
 	fip = VTOI(fvp);
 	tdp = VTOI(tdvp);
 	tip = NULL;
 	if (tvp)
 		tip = VTOI(tvp);
 	if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
 	    (VTOI(tdvp)->i_flags & APPEND))) {
 		error = EPERM;
 		goto unlockout;
 	}
 	/*
 	 * Renaming a file to itself has no effect.  The upper layers should
 	 * not call us in that case.  However, things could change after
 	 * we drop the locks above.
 	 */
 	if (fvp == tvp) {
 		error = 0;
 		goto unlockout;
 	}
 	doingdirectory = 0;
 	newparent = 0;
 	ino = fip->i_number;
 	if (fip->i_nlink >= LINK_MAX) {
 		error = EMLINK;
 		goto unlockout;
 	}
 	if ((fip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))
 	    || (fdp->i_flags & APPEND)) {
 		error = EPERM;
 		goto unlockout;
 	}
 	if ((fip->i_mode & IFMT) == IFDIR) {
 		/*
 		 * Avoid ".", "..", and aliases of "." for obvious reasons.
 		 */
 		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
 		    fdp == fip ||
 		    (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) {
 			error = EINVAL;
 			goto unlockout;
 		}
 		if (fdp->i_number != tdp->i_number)
 			newparent = tdp->i_number;
 		doingdirectory = 1;
 	}
 	if ((fvp->v_type == VDIR && fvp->v_mountedhere != NULL) ||
 	    (tvp != NULL && tvp->v_type == VDIR &&
 	    tvp->v_mountedhere != NULL)) {
 		error = EXDEV;
 		goto unlockout;
 	}
 
 	/*
 	 * If ".." must be changed (ie the directory gets a new
 	 * parent) then the source directory must not be in the
 	 * directory hierarchy above the target, as this would
 	 * orphan everything below the source directory. Also
 	 * the user must have write permission in the source so
 	 * as to be able to change "..".
 	 */
 	if (doingdirectory && newparent) {
 		error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread);
 		if (error)
 			goto unlockout;
 		error = ufs_checkpath(ino, fdp->i_number, tdp, tcnp->cn_cred,
 		    &ino);
 		/*
 		 * We encountered a lock that we have to wait for.  Unlock
 		 * everything else and VGET before restarting.
 		 */
 		if (ino) {
 			VOP_UNLOCK(fdvp, 0);
 			VOP_UNLOCK(fvp, 0);
 			VOP_UNLOCK(tdvp, 0);
 			if (tvp)
 				VOP_UNLOCK(tvp, 0);
 			error = VFS_VGET(mp, ino, LK_SHARED, &nvp);
 			if (error == 0)
 				vput(nvp);
 			atomic_add_int(&rename_restarts, 1);
 			goto relock;
 		}
 		if (error)
 			goto unlockout;
 		if ((tcnp->cn_flags & SAVESTART) == 0)
 			panic("ufs_rename: lost to startdir");
 	}
 	if (fip->i_effnlink == 0 || fdp->i_effnlink == 0 ||
 	    tdp->i_effnlink == 0)
 		panic("Bad effnlink fip %p, fdp %p, tdp %p", fip, fdp, tdp);
 
 	/*
 	 * 1) Bump link count while we're moving stuff
 	 *    around.  If we crash somewhere before
 	 *    completing our work, the link count
 	 *    may be wrong, but correctable.
 	 */
 	fip->i_effnlink++;
 	fip->i_nlink++;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	fip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(fvp))
 		softdep_setup_link(tdp, fip);
 	error = UFS_UPDATE(fvp, !DOINGSOFTDEP(fvp) && !DOINGASYNC(fvp));
 	if (error)
 		goto bad;
 
 	/*
 	 * 2) If target doesn't exist, link the target
 	 *    to the source and unlink the source.
 	 *    Otherwise, rewrite the target directory
 	 *    entry to reference the source inode and
 	 *    expunge the original entry's existence.
 	 */
 	if (tip == NULL) {
 		if (ITODEV(tdp) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		if (doingdirectory && newparent) {
 			/*
 			 * Account for ".." in new directory.
 			 * When source and destination have the same
 			 * parent we don't adjust the link count.  The
 			 * actual link modification is completed when
 			 * .. is rewritten below.
 			 */
 			if ((nlink_t)tdp->i_nlink >= LINK_MAX) {
 				error = EMLINK;
 				goto bad;
 			}
 		}
 		ufs_makedirentry(fip, tcnp, &newdir);
 		error = ufs_direnter(tdvp, NULL, &newdir, tcnp, NULL, 1);
 		if (error)
 			goto bad;
 		/* Setup tdvp for directory compaction if needed. */
 		if (tdp->i_count && tdp->i_endoff &&
 		    tdp->i_endoff < tdp->i_size)
 			endoff = tdp->i_endoff;
 	} else {
 		if (ITODEV(tip) != ITODEV(tdp) || ITODEV(tip) != ITODEV(fip))
 			panic("ufs_rename: EXDEV");
 		/*
 		 * Short circuit rename(foo, foo).
 		 */
 		if (tip->i_number == fip->i_number)
 			panic("ufs_rename: same file");
 		/*
 		 * If the parent directory is "sticky", then the caller
 		 * must possess VADMIN for the parent directory, or the
 		 * destination of the rename.  This implements append-only
 		 * directories.
 		 */
 		if ((tdp->i_mode & S_ISTXT) &&
 		    VOP_ACCESS(tdvp, VADMIN, tcnp->cn_cred, td) &&
 		    VOP_ACCESS(tvp, VADMIN, tcnp->cn_cred, td)) {
 			error = EPERM;
 			goto bad;
 		}
 		/*
 		 * Target must be empty if a directory and have no links
 		 * to it. Also, ensure source and target are compatible
 		 * (both directories, or both not directories).
 		 */
 		if ((tip->i_mode & IFMT) == IFDIR) {
 			if ((tip->i_effnlink > 2) ||
 			    !ufs_dirempty(tip, tdp->i_number, tcnp->cn_cred)) {
 				error = ENOTEMPTY;
 				goto bad;
 			}
 			if (!doingdirectory) {
 				error = ENOTDIR;
 				goto bad;
 			}
 			cache_purge(tdvp);
 		} else if (doingdirectory) {
 			error = EISDIR;
 			goto bad;
 		}
 		if (doingdirectory) {
 			if (!newparent) {
 				tdp->i_effnlink--;
 				if (DOINGSOFTDEP(tdvp))
 					softdep_change_linkcnt(tdp);
 			}
 			tip->i_effnlink--;
 			if (DOINGSOFTDEP(tvp))
 				softdep_change_linkcnt(tip);
 		}
 		error = ufs_dirrewrite(tdp, tip, fip->i_number,
 		    IFTODT(fip->i_mode),
 		    (doingdirectory && newparent) ? newparent : doingdirectory);
 		if (error) {
 			if (doingdirectory) {
 				if (!newparent) {
 					tdp->i_effnlink++;
 					if (DOINGSOFTDEP(tdvp))
 						softdep_change_linkcnt(tdp);
 				}
 				tip->i_effnlink++;
 				if (DOINGSOFTDEP(tvp))
 					softdep_change_linkcnt(tip);
 			}
 		}
 		if (doingdirectory && !DOINGSOFTDEP(tvp)) {
 			/*
 			 * The only stuff left in the directory is "."
 			 * and "..". The "." reference is inconsequential
 			 * since we are quashing it. We have removed the "."
 			 * reference and the reference in the parent directory,
 			 * but there may be other hard links. The soft
 			 * dependency code will arrange to do these operations
 			 * after the parent directory entry has been deleted on
 			 * disk, so when running with that code we avoid doing
 			 * them now.
 			 */
 			if (!newparent) {
 				tdp->i_nlink--;
 				DIP_SET(tdp, i_nlink, tdp->i_nlink);
 				tdp->i_flag |= IN_CHANGE;
 			}
 			tip->i_nlink--;
 			DIP_SET(tip, i_nlink, tip->i_nlink);
 			tip->i_flag |= IN_CHANGE;
 		}
 	}
 
 	/*
 	 * 3) Unlink the source.  We have to resolve the path again to
 	 * fixup the directory offset and count for ufs_dirremove.
 	 */
 	if (fdvp == tdvp) {
 		error = ufs_lookup_ino(fdvp, NULL, fcnp, &ino);
 		if (error)
 			panic("ufs_rename: from entry went away!");
 		if (ino != fip->i_number)
 			panic("ufs_rename: ino mismatch %ju != %ju\n",
 			    (uintmax_t)ino, (uintmax_t)fip->i_number);
 	}
 	/*
 	 * If the source is a directory with a
 	 * new parent, the link count of the old
 	 * parent directory must be decremented
 	 * and ".." set to point to the new parent.
 	 */
 	if (doingdirectory && newparent) {
 		/*
 		 * If tip exists we simply use its link, otherwise we must
 		 * add a new one.
 		 */
 		if (tip == NULL) {
 			tdp->i_effnlink++;
 			tdp->i_nlink++;
 			DIP_SET(tdp, i_nlink, tdp->i_nlink);
 			tdp->i_flag |= IN_CHANGE;
 			if (DOINGSOFTDEP(tdvp))
 				softdep_setup_dotdot_link(tdp, fip);
 			error = UFS_UPDATE(tdvp, !DOINGSOFTDEP(tdvp) &&
 			    !DOINGASYNC(tdvp));
 			/* Don't go to bad here as the new link exists. */
 			if (error)
 				goto unlockout;
 		} else if (DOINGSUJ(tdvp))
 			/* Journal must account for each new link. */
 			softdep_setup_dotdot_link(tdp, fip);
 		fip->i_offset = mastertemplate.dot_reclen;
 		ufs_dirrewrite(fip, fdp, newparent, DT_DIR, 0);
 		cache_purge(fdvp);
 	}
 	error = ufs_dirremove(fdvp, fip, fcnp->cn_flags, 0);
 	/*
 	 * The kern_renameat() looks up the fvp using the DELETE flag, which
 	 * causes the removal of the name cache entry for fvp.
 	 * As the relookup of the fvp is done in two steps:
 	 * ufs_lookup_ino() and then VFS_VGET(), another thread might do a
 	 * normal lookup of the from name just before the VFS_VGET() call,
 	 * causing the cache entry to be re-instantiated.
 	 *
 	 * The same issue also applies to tvp if it exists as
 	 * otherwise we may have a stale name cache entry for the new
 	 * name that references the old i-node if it has other links
 	 * or open file descriptors.
 	 */
 	cache_purge(fvp);
 	if (tvp)
 		cache_purge(tvp);
 	cache_purge_negative(tdvp);
 
 unlockout:
 	vput(fdvp);
 	vput(fvp);
 	if (tvp)
 		vput(tvp);
 	/*
 	 * If compaction or fsync was requested do it now that other locks
 	 * are no longer needed.
 	 */
 	if (error == 0 && endoff != 0) {
 		error = UFS_TRUNCATE(tdvp, endoff, IO_NORMAL |
 		    (DOINGASYNC(tdvp) ? 0 : IO_SYNC), tcnp->cn_cred);
 		if (error != 0)
 			vn_printf(tdvp, "ufs_rename: failed to truncate "
 			    "err %d", error);
 #ifdef UFS_DIRHASH
 		else if (tdp->i_dirhash != NULL)
 			ufsdirhash_dirtrunc(tdp, endoff);
 #endif
 		/*
 		 * Even if the directory compaction failed, rename was
 		 * succesful.  Do not propagate a UFS_TRUNCATE() error
 		 * to the caller.
 		 */
 		error = 0;
 	}
 	if (error == 0 && tdp->i_flag & IN_NEEDSYNC)
 		error = VOP_FSYNC(tdvp, MNT_WAIT, td);
 	vput(tdvp);
 	return (error);
 
 bad:
 	fip->i_effnlink--;
 	fip->i_nlink--;
 	DIP_SET(fip, i_nlink, fip->i_nlink);
 	fip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(fvp))
 		softdep_revert_link(tdp, fip);
 	goto unlockout;
 
 releout:
 	vrele(fdvp);
 	vrele(fvp);
 	vrele(tdvp);
 	if (tvp)
 		vrele(tvp);
 
 	return (error);
 }
 
 #ifdef UFS_ACL
 static int
 ufs_do_posix1e_acl_inheritance_dir(struct vnode *dvp, struct vnode *tvp,
     mode_t dmode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *dacl, *acl;
 
 	acl = acl_alloc(M_WAITOK);
 	dacl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL from parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.  If the ACL is empty, fall through to
 		 * the "not defined or available" case.
 		 */
 		if (acl->acl_cnt != 0) {
 			dmode = acl_posix1e_newfilemode(dmode, acl);
 			ip->i_mode = dmode;
 			DIP_SET(ip, i_mode, dmode);
 			*dacl = *acl;
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = dmode;
 		DIP_SET(ip, i_mode, dmode);
 		error = 0;
 		goto out;
 	
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	if (error == 0)
 		error = VOP_SETACL(tvp, ACL_TYPE_DEFAULT, dacl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above
 		 * was supposed to free acl.
 		 */
 		printf("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()\n");
 		/*
 		panic("ufs_mkdir: VOP_GETACL() but no VOP_SETACL()");
 		 */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 	acl_free(dacl);
 
 	return (error);
 }
 
 static int
 ufs_do_posix1e_acl_inheritance_file(struct vnode *dvp, struct vnode *tvp,
     mode_t mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct inode *ip = VTOI(tvp);
 	struct acl *acl;
 
 	acl = acl_alloc(M_WAITOK);
 
 	/*
 	 * Retrieve default ACL for parent, if any.
 	 */
 	error = VOP_GETACL(dvp, ACL_TYPE_DEFAULT, acl, cred, td);
 	switch (error) {
 	case 0:
 		/*
 		 * Retrieved a default ACL, so merge mode and ACL if
 		 * necessary.
 		 */
 		if (acl->acl_cnt != 0) {
 			/*
 			 * Two possible ways for default ACL to not
 			 * be present.  First, the EA can be
 			 * undefined, or second, the default ACL can
 			 * be blank.  If it's blank, fall through to
 			 * the it's not defined case.
 			 */
 			mode = acl_posix1e_newfilemode(mode, acl);
 			ip->i_mode = mode;
 			DIP_SET(ip, i_mode, mode);
 			ufs_sync_acl_from_inode(ip, acl);
 			break;
 		}
 		/* FALLTHROUGH */
 
 	case EOPNOTSUPP:
 		/*
 		 * Just use the mode as-is.
 		 */
 		ip->i_mode = mode;
 		DIP_SET(ip, i_mode, mode);
 		error = 0;
 		goto out;
 
 	default:
 		goto out;
 	}
 
 	/*
 	 * XXX: If we abort now, will Soft Updates notify the extattr
 	 * code that the EAs for the file need to be released?
 	 */
 	error = VOP_SETACL(tvp, ACL_TYPE_ACCESS, acl, cred, td);
 	switch (error) {
 	case 0:
 		break;
 
 	case EOPNOTSUPP:
 		/*
 		 * XXX: This should not happen, as EOPNOTSUPP above was
 		 * supposed to free acl.
 		 */
 		printf("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()\n");
 		/* panic("ufs_do_posix1e_acl_inheritance_file: VOP_GETACL() "
 		    "but no VOP_SETACL()"); */
 		break;
 
 	default:
 		goto out;
 	}
 
 out:
 	acl_free(acl);
 
 	return (error);
 }
 
 static int
 ufs_do_nfs4_acl_inheritance(struct vnode *dvp, struct vnode *tvp,
     mode_t child_mode, struct ucred *cred, struct thread *td)
 {
 	int error;
 	struct acl *parent_aclp, *child_aclp;
 
 	parent_aclp = acl_alloc(M_WAITOK);
 	child_aclp = acl_alloc(M_WAITOK | M_ZERO);
 
 	error = ufs_getacl_nfs4_internal(dvp, parent_aclp, td);
 	if (error)
 		goto out;
 	acl_nfs4_compute_inherited_acl(parent_aclp, child_aclp,
 	    child_mode, VTOI(tvp)->i_uid, tvp->v_type == VDIR);
 	error = ufs_setacl_nfs4_internal(tvp, child_aclp, td);
 	if (error)
 		goto out;
 out:
 	acl_free(parent_aclp);
 	acl_free(child_aclp);
 
 	return (error);
 }
 #endif
 
 /*
  * Mkdir system call
  */
 static int
 ufs_mkdir(ap)
 	struct vop_mkdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 	} */ *ap;
 {
 	struct vnode *dvp = ap->a_dvp;
 	struct vattr *vap = ap->a_vap;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	struct vnode *tvp;
 	struct buf *bp;
 	struct dirtemplate dirtemplate, *dtp;
 	struct direct newdir;
 	int error, dmode;
 	long blkoff;
 
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("ufs_mkdir: no name");
 #endif
 	dp = VTOI(dvp);
 	if ((nlink_t)dp->i_nlink >= LINK_MAX) {
 		error = EMLINK;
 		goto out;
 	}
 	dmode = vap->va_mode & 0777;
 	dmode |= IFDIR;
 	/*
 	 * Must simulate part of ufs_makeinode here to acquire the inode,
 	 * but not have it entered in the parent directory. The entry is
 	 * made later after writing "." and ".." entries.
 	 */
 	if (dp->i_effnlink < 2) {
 		print_bad_link_count("ufs_mkdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 	error = UFS_VALLOC(dvp, dmode, cnp->cn_cred, &tvp);
 	if (error)
 		goto out;
 	ip = VTOI(tvp);
 	ip->i_gid = dp->i_gid;
 	DIP_SET(ip, i_gid, dp->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * The new directory also inherits the SUID bit.
 		 * If user's UID and dir UID are the same,
 		 * 'give it away' so that the SUID is still forced on.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (dp->i_mode & ISUID) && dp->i_uid) {
 			dmode |= ISUID;
 			ip->i_uid = dp->i_uid;
 			DIP_SET(ip, i_uid, dp->i_uid);
 #ifdef QUOTA
 			if (dp->i_uid != cnp->cn_cred->cr_uid) {
 				/*
 				 * Make sure the correct user gets charged
 				 * for the space.
 				 * Make a dummy credential for the victim.
 				 * XXX This seems to never be accessed out of
 				 * our context so a stack variable is ok.
 				 */
 				refcount_init(&ucred.cr_ref, 1);
 				ucred.cr_uid = ip->i_uid;
 				ucred.cr_ngroups = 1;
 				ucred.cr_groups = &ucred_group;
 				ucred.cr_groups[0] = dp->i_gid;
 				ucp = &ucred;
 			}
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(dp, ip);
 			UFS_VFREE(tvp, ip->i_number, dmode);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(dp, ip);
 		UFS_VFREE(tvp, ip->i_number, dmode);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = dmode;
 	DIP_SET(ip, i_mode, dmode);
 	tvp->v_type = VDIR;	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 2;
 	ip->i_nlink = 2;
 	DIP_SET(ip, i_nlink, 2);
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Bump link count in parent directory to reflect work done below.
 	 * Should be done before reference is created so cleanup is
 	 * possible if we crash.
 	 */
 	dp->i_effnlink++;
 	dp->i_nlink++;
 	DIP_SET(dp, i_nlink, dp->i_nlink);
 	dp->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(dvp))
 		softdep_setup_mkdir(dp, ip);
 	error = UFS_UPDATE(dvp, !DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_dir(dvp, tvp, dmode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, dmode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 
 	/*
 	 * Initialize directory with "." and ".." from static template.
 	 */
 	if (dvp->v_mount->mnt_maxsymlinklen > 0)
 		dtp = &mastertemplate;
 	else
 		dtp = (struct dirtemplate *)&omastertemplate;
 	dirtemplate = *dtp;
 	dirtemplate.dot_ino = ip->i_number;
 	dirtemplate.dotdot_ino = dp->i_number;
 	vnode_pager_setsize(tvp, DIRBLKSIZ);
 	if ((error = UFS_BALLOC(tvp, (off_t)0, DIRBLKSIZ, cnp->cn_cred,
 	    BA_CLRBUF, &bp)) != 0)
 		goto bad;
 	ip->i_size = DIRBLKSIZ;
 	DIP_SET(ip, i_size, DIRBLKSIZ);
 	ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	bcopy((caddr_t)&dirtemplate, (caddr_t)bp->b_data, sizeof dirtemplate);
 	if (DOINGSOFTDEP(tvp)) {
 		/*
 		 * Ensure that the entire newly allocated block is a
 		 * valid directory so that future growth within the
 		 * block does not have to ensure that the block is
 		 * written before the inode.
 		 */
 		blkoff = DIRBLKSIZ;
 		while (blkoff < bp->b_bcount) {
 			((struct direct *)
 			   (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ;
 			blkoff += DIRBLKSIZ;
 		}
 	}
 	if ((error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) &&
 	    !DOINGASYNC(tvp))) != 0) {
 		(void)bwrite(bp);
 		goto bad;
 	}
 	/*
 	 * Directory set up, now install its entry in the parent directory.
 	 *
 	 * If we are not doing soft dependencies, then we must write out the
 	 * buffer containing the new directory body before entering the new 
 	 * name in the parent. If we are doing soft dependencies, then the
 	 * buffer containing the new directory body will be passed to and
 	 * released in the soft dependency code after the code has attached
 	 * an appropriate ordering dependency to the buffer which ensures that
 	 * the buffer is written before the new name is written in the parent.
 	 */
 	if (DOINGASYNC(dvp))
 		bdwrite(bp);
 	else if (!DOINGSOFTDEP(dvp) && ((error = bwrite(bp))))
 		goto bad;
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, bp, 0);
 	
 bad:
 	if (error == 0) {
 		*ap->a_vpp = tvp;
 	} else {
 		dp->i_effnlink--;
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		dp->i_flag |= IN_CHANGE;
 		/*
 		 * No need to do an explicit VOP_TRUNCATE here, vrele will
 		 * do this for us because we set the link count to 0.
 		 */
 		ip->i_effnlink = 0;
 		ip->i_nlink = 0;
 		DIP_SET(ip, i_nlink, 0);
 		ip->i_flag |= IN_CHANGE;
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_mkdir(dp, ip);
 
 		vput(tvp);
 	}
 out:
 	return (error);
 }
 
 /*
  * Rmdir system call.
  */
 static int
 ufs_rmdir(ap)
 	struct vop_rmdir_args /* {
 		struct vnode *a_dvp;
 		struct vnode *a_vp;
 		struct componentname *a_cnp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode *dvp = ap->a_dvp;
 	struct componentname *cnp = ap->a_cnp;
 	struct inode *ip, *dp;
 	int error;
 
 	ip = VTOI(vp);
 	dp = VTOI(dvp);
 
 	/*
 	 * Do not remove a directory that is in the process of being renamed.
 	 * Verify the directory is empty (and valid). Rmdir ".." will not be
 	 * valid since ".." will contain a reference to the current directory
 	 * and thus be non-empty. Do not allow the removal of mounted on
 	 * directories (this can happen when an NFS exported filesystem
 	 * tries to remove a locally mounted on directory).
 	 */
 	error = 0;
 	if (dp->i_effnlink <= 2) {
 		if (dp->i_effnlink == 2)
 			print_bad_link_count("ufs_rmdir", dvp);
 		error = EINVAL;
 		goto out;
 	}
 	if (!ufs_dirempty(ip, dp->i_number, cnp->cn_cred)) {
 		error = ENOTEMPTY;
 		goto out;
 	}
 	if ((dp->i_flags & APPEND)
 	    || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
 		error = EPERM;
 		goto out;
 	}
 	if (vp->v_mountedhere != 0) {
 		error = EINVAL;
 		goto out;
 	}
 #ifdef UFS_GJOURNAL
 	ufs_gjournal_orphan(vp);
 #endif
 	/*
 	 * Delete reference to directory before purging
 	 * inode.  If we crash in between, the directory
 	 * will be reattached to lost+found,
 	 */
 	dp->i_effnlink--;
 	ip->i_effnlink--;
 	if (DOINGSOFTDEP(vp))
 		softdep_setup_rmdir(dp, ip);
 	error = ufs_dirremove(dvp, ip, cnp->cn_flags, 1);
 	if (error) {
 		dp->i_effnlink++;
 		ip->i_effnlink++;
 		if (DOINGSOFTDEP(vp))
 			softdep_revert_rmdir(dp, ip);
 		goto out;
 	}
 	cache_purge(dvp);
 	/*
 	 * The only stuff left in the directory is "." and "..". The "."
 	 * reference is inconsequential since we are quashing it. The soft
 	 * dependency code will arrange to do these operations after
 	 * the parent directory entry has been deleted on disk, so
 	 * when running with that code we avoid doing them now.
 	 */
 	if (!DOINGSOFTDEP(vp)) {
 		dp->i_nlink--;
 		DIP_SET(dp, i_nlink, dp->i_nlink);
 		dp->i_flag |= IN_CHANGE;
 		error = UFS_UPDATE(dvp, 0);
 		ip->i_nlink--;
 		DIP_SET(ip, i_nlink, ip->i_nlink);
 		ip->i_flag |= IN_CHANGE;
 	}
 	cache_purge(vp);
 #ifdef UFS_DIRHASH
 	/* Kill any active hash; i_effnlink == 0, so it will not come back. */
 	if (ip->i_dirhash != NULL)
 		ufsdirhash_free(ip);
 #endif
 out:
 	return (error);
 }
 
 /*
  * symlink -- make a symbolic link
  */
 static int
 ufs_symlink(ap)
 	struct vop_symlink_args /* {
 		struct vnode *a_dvp;
 		struct vnode **a_vpp;
 		struct componentname *a_cnp;
 		struct vattr *a_vap;
 		char *a_target;
 	} */ *ap;
 {
 	struct vnode *vp, **vpp = ap->a_vpp;
 	struct inode *ip;
 	int len, error;
 
 	error = ufs_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp,
 	    vpp, ap->a_cnp, "ufs_symlink");
 	if (error)
 		return (error);
 	vp = *vpp;
 	len = strlen(ap->a_target);
 	if (len < vp->v_mount->mnt_maxsymlinklen) {
 		ip = VTOI(vp);
 		bcopy(ap->a_target, SHORTLINK(ip), len);
 		ip->i_size = len;
 		DIP_SET(ip, i_size, len);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		error = UFS_UPDATE(vp, 0);
 	} else
 		error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0,
 		    UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK,
 		    ap->a_cnp->cn_cred, NOCRED, NULL, NULL);
 	if (error)
 		vput(vp);
 	return (error);
 }
 
 /*
  * Vnode op for reading directories.
  */
 int
 ufs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		int *a_ncookies;
 		u_long **a_cookies;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct buf *bp;
 	struct inode *ip;
 	struct direct *dp, *edp;
 	u_long *cookies;
 	struct dirent dstdp;
 	off_t offset, startoffset;
 	size_t readcnt, skipcnt;
 	ssize_t startresid;
 	int ncookies;
 	int error;
 
 	if (uio->uio_offset < 0)
 		return (EINVAL);
 	ip = VTOI(vp);
 	if (ip->i_effnlink == 0)
 		return (0);
 	if (ap->a_ncookies != NULL) {
 		ncookies = uio->uio_resid;
 		if (uio->uio_offset >= ip->i_size)
 			ncookies = 0;
 		else if (ip->i_size - uio->uio_offset < ncookies)
 			ncookies = ip->i_size - uio->uio_offset;
 		ncookies = ncookies / (offsetof(struct direct, d_name) + 4) + 1;
 		cookies = malloc(ncookies * sizeof(*cookies), M_TEMP, M_WAITOK);
 		*ap->a_ncookies = ncookies;
 		*ap->a_cookies = cookies;
 	} else {
 		ncookies = 0;
 		cookies = NULL;
 	}
 	offset = startoffset = uio->uio_offset;
 	startresid = uio->uio_resid;
 	error = 0;
 	while (error == 0 && uio->uio_resid > 0 &&
 	    uio->uio_offset < ip->i_size) {
 		error = ffs_blkatoff(vp, uio->uio_offset, NULL, &bp);
 		if (error)
 			break;
 		if (bp->b_offset + bp->b_bcount > ip->i_size)
 			readcnt = ip->i_size - bp->b_offset;
 		else
 			readcnt = bp->b_bcount;
 		skipcnt = (size_t)(uio->uio_offset - bp->b_offset) &
 		    ~(size_t)(DIRBLKSIZ - 1);
 		offset = bp->b_offset + skipcnt;
 		dp = (struct direct *)&bp->b_data[skipcnt];
 		edp = (struct direct *)&bp->b_data[readcnt];
 		while (error == 0 && uio->uio_resid > 0 && dp < edp) {
 			if (dp->d_reclen <= offsetof(struct direct, d_name) ||
 			    (caddr_t)dp + dp->d_reclen > (caddr_t)edp) {
 				error = EIO;
 				break;
 			}
 #if BYTE_ORDER == LITTLE_ENDIAN
 			/* Old filesystem format. */
 			if (vp->v_mount->mnt_maxsymlinklen <= 0) {
 				dstdp.d_namlen = dp->d_type;
 				dstdp.d_type = dp->d_namlen;
 			} else
 #endif
 			{
 				dstdp.d_namlen = dp->d_namlen;
 				dstdp.d_type = dp->d_type;
 			}
 			if (offsetof(struct direct, d_name) + dstdp.d_namlen >
 			    dp->d_reclen) {
 				error = EIO;
 				break;
 			}
 			if (offset < startoffset || dp->d_ino == 0)
 				goto nextentry;
 			dstdp.d_fileno = dp->d_ino;
 			dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp);
 			bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen);
 			dstdp.d_name[dstdp.d_namlen] = '\0';
 			if (dstdp.d_reclen > uio->uio_resid) {
 				if (uio->uio_resid == startresid)
 					error = EINVAL;
 				else
 					error = EJUSTRETURN;
 				break;
 			}
 			/* Advance dp. */
 			error = uiomove((caddr_t)&dstdp, dstdp.d_reclen, uio);
 			if (error)
 				break;
 			if (cookies != NULL) {
 				KASSERT(ncookies > 0,
 				    ("ufs_readdir: cookies buffer too small"));
 				*cookies = offset + dp->d_reclen;
 				cookies++;
 				ncookies--;
 			}
 nextentry:
 			offset += dp->d_reclen;
 			dp = (struct direct *)((caddr_t)dp + dp->d_reclen);
 		}
 		bqrelse(bp);
 		uio->uio_offset = offset;
 	}
 	/* We need to correct uio_offset. */
 	uio->uio_offset = offset;
 	if (error == EJUSTRETURN)
 		error = 0;
 	if (ap->a_ncookies != NULL) {
 		if (error == 0) {
 			ap->a_ncookies -= ncookies;
 		} else {
 			free(*ap->a_cookies, M_TEMP);
 			*ap->a_ncookies = 0;
 			*ap->a_cookies = NULL;
 		}
 	}
 	if (error == 0 && ap->a_eofflag)
 		*ap->a_eofflag = ip->i_size <= uio->uio_offset;
 	return (error);
 }
 
 /*
  * Return target name of a symbolic link
  */
 static int
 ufs_readlink(ap)
 	struct vop_readlink_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 	doff_t isize;
 
 	isize = ip->i_size;
 	if ((isize < vp->v_mount->mnt_maxsymlinklen) ||
 	    DIP(ip, i_blocks) == 0) { /* XXX - for old fastlink support */
 		return (uiomove(SHORTLINK(ip), isize, ap->a_uio));
 	}
 	return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred));
 }
 
 /*
  * Calculate the logical to physical mapping if not done already,
  * then call the device strategy routine.
  *
  * In order to be able to swap to a file, the ufs_bmaparray() operation may not
  * deadlock on memory.  See ufs_bmap() for details.
  */
 static int
 ufs_strategy(ap)
 	struct vop_strategy_args /* {
 		struct vnode *a_vp;
 		struct buf *a_bp;
 	} */ *ap;
 {
 	struct buf *bp = ap->a_bp;
 	struct vnode *vp = ap->a_vp;
 	ufs2_daddr_t blkno;
 	int error;
 
 	if (bp->b_blkno == bp->b_lblkno) {
 		error = ufs_bmaparray(vp, bp->b_lblkno, &blkno, bp, NULL, NULL);
 		bp->b_blkno = blkno;
 		if (error) {
 			bp->b_error = error;
 			bp->b_ioflags |= BIO_ERROR;
 			bufdone(bp);
 			return (0);
 		}
 		if ((long)bp->b_blkno == -1)
 			vfs_bio_clrbuf(bp);
 	}
 	if ((long)bp->b_blkno == -1) {
 		bufdone(bp);
 		return (0);
 	}
 	bp->b_iooffset = dbtob(bp->b_blkno);
 	BO_STRATEGY(VFSTOUFS(vp->v_mount)->um_bo, bp);
 	return (0);
 }
 
 /*
  * Print out the contents of an inode.
  */
 static int
 ufs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct inode *ip = VTOI(vp);
 
 	printf("\tino %lu, on dev %s", (u_long)ip->i_number,
 	    devtoname(ITODEV(ip)));
 	if (vp->v_type == VFIFO)
 		fifo_printinfo(vp);
 	printf("\n");
 	return (0);
 }
 
 /*
  * Close wrapper for fifos.
  *
  * Update the times on the inode then do device close.
  */
 static int
 ufsfifo_close(ap)
 	struct vop_close_args /* {
 		struct vnode *a_vp;
 		int  a_fflag;
 		struct ucred *a_cred;
 		struct thread *a_td;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	int usecount;
 
 	VI_LOCK(vp);
 	usecount = vp->v_usecount;
 	if (usecount > 1)
 		ufs_itimes_locked(vp);
 	VI_UNLOCK(vp);
 	return (fifo_specops.vop_close(ap));
 }
 
 /*
  * Kqfilter wrapper for fifos.
  *
  * Fall through to ufs kqfilter routines if needed 
  */
 static int
 ufsfifo_kqfilter(ap)
 	struct vop_kqfilter_args *ap;
 {
 	int error;
 
 	error = fifo_specops.vop_kqfilter(ap);
 	if (error)
 		error = vfs_kqfilter(ap);
 	return (error);
 }
 
 /*
  * Return POSIX pathconf information applicable to fifos.
  */
 static int
 ufsfifo_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 
 	switch (ap->a_name) {
 	case _PC_ACL_EXTENDED:
 	case _PC_ACL_NFS4:
 	case _PC_ACL_PATH_MAX:
 	case _PC_MAC_PRESENT:
 		return (ufs_pathconf(ap));
 	default:
 		return (fifo_specops.vop_pathconf(ap));
 	}
 	/* NOTREACHED */
 }
 
 /*
  * Return POSIX pathconf information applicable to ufs filesystems.
  */
 static int
 ufs_pathconf(ap)
 	struct vop_pathconf_args /* {
 		struct vnode *a_vp;
 		int a_name;
 		int *a_retval;
 	} */ *ap;
 {
 	int error;
 
 	error = 0;
 	switch (ap->a_name) {
 	case _PC_LINK_MAX:
 		*ap->a_retval = LINK_MAX;
 		break;
 	case _PC_NAME_MAX:
 		*ap->a_retval = NAME_MAX;
 		break;
 	case _PC_PATH_MAX:
 		*ap->a_retval = PATH_MAX;
 		break;
 	case _PC_PIPE_BUF:
 		*ap->a_retval = PIPE_BUF;
 		break;
 	case _PC_CHOWN_RESTRICTED:
 		*ap->a_retval = 1;
 		break;
 	case _PC_NO_TRUNC:
 		*ap->a_retval = 1;
 		break;
 	case _PC_ACL_EXTENDED:
 #ifdef UFS_ACL
 		if (ap->a_vp->v_mount->mnt_flag & MNT_ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 #else
 		*ap->a_retval = 0;
 #endif
 		break;
 
 	case _PC_ACL_NFS4:
 #ifdef UFS_ACL
 		if (ap->a_vp->v_mount->mnt_flag & MNT_NFS4ACLS)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 #else
 		*ap->a_retval = 0;
 #endif
 		break;
 
 	case _PC_ACL_PATH_MAX:
 #ifdef UFS_ACL
 		if (ap->a_vp->v_mount->mnt_flag & (MNT_ACLS | MNT_NFS4ACLS))
 			*ap->a_retval = ACL_MAX_ENTRIES;
 		else
 			*ap->a_retval = 3;
 #else
 		*ap->a_retval = 3;
 #endif
 		break;
 	case _PC_MAC_PRESENT:
 #ifdef MAC
 		if (ap->a_vp->v_mount->mnt_flag & MNT_MULTILABEL)
 			*ap->a_retval = 1;
 		else
 			*ap->a_retval = 0;
 #else
 		*ap->a_retval = 0;
 #endif
 		break;
 	case _PC_MIN_HOLE_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_ASYNC_IO:
 		/* _PC_ASYNC_IO should have been handled by upper layers. */
 		KASSERT(0, ("_PC_ASYNC_IO should not get here"));
 		error = EINVAL;
 		break;
 	case _PC_PRIO_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_SYNC_IO:
 		*ap->a_retval = 0;
 		break;
 	case _PC_ALLOC_SIZE_MIN:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize;
 		break;
 	case _PC_FILESIZEBITS:
 		*ap->a_retval = 64;
 		break;
 	case _PC_REC_INCR_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_MAX_XFER_SIZE:
 		*ap->a_retval = -1; /* means ``unlimited'' */
 		break;
 	case _PC_REC_MIN_XFER_SIZE:
 		*ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize;
 		break;
 	case _PC_REC_XFER_ALIGN:
 		*ap->a_retval = PAGE_SIZE;
 		break;
 	case _PC_SYMLINK_MAX:
 		*ap->a_retval = MAXPATHLEN;
 		break;
 
 	default:
 		error = EINVAL;
 		break;
 	}
 	return (error);
 }
 
 /*
  * Initialize the vnode associated with a new inode, handle aliased
  * vnodes.
  */
 int
 ufs_vinit(mntp, fifoops, vpp)
 	struct mount *mntp;
 	struct vop_vector *fifoops;
 	struct vnode **vpp;
 {
 	struct inode *ip;
 	struct vnode *vp;
 
 	vp = *vpp;
 	ip = VTOI(vp);
 	vp->v_type = IFTOVT(ip->i_mode);
 	if (vp->v_type == VFIFO)
 		vp->v_op = fifoops;
 	ASSERT_VOP_LOCKED(vp, "ufs_vinit");
 	if (ip->i_number == ROOTINO)
 		vp->v_vflag |= VV_ROOT;
 	*vpp = vp;
 	return (0);
 }
 
 /*
  * Allocate a new inode.
  * Vnode dvp must be locked.
  */
 static int
 ufs_makeinode(mode, dvp, vpp, cnp, callfunc)
 	int mode;
 	struct vnode *dvp;
 	struct vnode **vpp;
 	struct componentname *cnp;
 	const char *callfunc;
 {
 	struct inode *ip, *pdir;
 	struct direct newdir;
 	struct vnode *tvp;
 	int error;
 
 	pdir = VTOI(dvp);
 #ifdef INVARIANTS
 	if ((cnp->cn_flags & HASBUF) == 0)
 		panic("%s: no name", callfunc);
 #endif
 	*vpp = NULL;
 	if ((mode & IFMT) == 0)
 		mode |= IFREG;
 
 	if (pdir->i_effnlink < 2) {
 		print_bad_link_count(callfunc, dvp);
 		return (EINVAL);
 	}
 	error = UFS_VALLOC(dvp, mode, cnp->cn_cred, &tvp);
 	if (error)
 		return (error);
 	ip = VTOI(tvp);
 	ip->i_gid = pdir->i_gid;
 	DIP_SET(ip, i_gid, pdir->i_gid);
 #ifdef SUIDDIR
 	{
 #ifdef QUOTA
 		struct ucred ucred, *ucp;
 		gid_t ucred_group;
 		ucp = cnp->cn_cred;
 #endif
 		/*
 		 * If we are not the owner of the directory,
 		 * and we are hacking owners here, (only do this where told to)
 		 * and we are not giving it TO root, (would subvert quotas)
 		 * then go ahead and give it to the other user.
 		 * Note that this drops off the execute bits for security.
 		 */
 		if ((dvp->v_mount->mnt_flag & MNT_SUIDDIR) &&
 		    (pdir->i_mode & ISUID) &&
 		    (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) {
 			ip->i_uid = pdir->i_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 			mode &= ~07111;
 #ifdef QUOTA
 			/*
 			 * Make sure the correct user gets charged
 			 * for the space.
 			 * Quickly knock up a dummy credential for the victim.
 			 * XXX This seems to never be accessed out of our
 			 * context so a stack variable is ok.
 			 */
 			refcount_init(&ucred.cr_ref, 1);
 			ucred.cr_uid = ip->i_uid;
 			ucred.cr_ngroups = 1;
 			ucred.cr_groups = &ucred_group;
 			ucred.cr_groups[0] = pdir->i_gid;
 			ucp = &ucred;
 #endif
 		} else {
 			ip->i_uid = cnp->cn_cred->cr_uid;
 			DIP_SET(ip, i_uid, ip->i_uid);
 		}
 
 #ifdef QUOTA
 		if ((error = getinoquota(ip)) ||
 	    	    (error = chkiq(ip, 1, ucp, 0))) {
 			if (DOINGSOFTDEP(tvp))
 				softdep_revert_link(pdir, ip);
 			UFS_VFREE(tvp, ip->i_number, mode);
 			vput(tvp);
 			return (error);
 		}
 #endif
 	}
 #else	/* !SUIDDIR */
 	ip->i_uid = cnp->cn_cred->cr_uid;
 	DIP_SET(ip, i_uid, ip->i_uid);
 #ifdef QUOTA
 	if ((error = getinoquota(ip)) ||
 	    (error = chkiq(ip, 1, cnp->cn_cred, 0))) {
 		if (DOINGSOFTDEP(tvp))
 			softdep_revert_link(pdir, ip);
 		UFS_VFREE(tvp, ip->i_number, mode);
 		vput(tvp);
 		return (error);
 	}
 #endif
 #endif	/* !SUIDDIR */
 	ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE;
 	ip->i_mode = mode;
 	DIP_SET(ip, i_mode, mode);
 	tvp->v_type = IFTOVT(mode);	/* Rest init'd in getnewvnode(). */
 	ip->i_effnlink = 1;
 	ip->i_nlink = 1;
 	DIP_SET(ip, i_nlink, 1);
 	if (DOINGSOFTDEP(tvp))
 		softdep_setup_create(VTOI(dvp), ip);
 	if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred) &&
 	    priv_check_cred(cnp->cn_cred, PRIV_VFS_SETGID, 0)) {
 		ip->i_mode &= ~ISGID;
 		DIP_SET(ip, i_mode, ip->i_mode);
 	}
 
 	if (cnp->cn_flags & ISWHITEOUT) {
 		ip->i_flags |= UF_OPAQUE;
 		DIP_SET(ip, i_flags, ip->i_flags);
 	}
 
 	/*
 	 * Make sure inode goes to disk before directory entry.
 	 */
 	error = UFS_UPDATE(tvp, !DOINGSOFTDEP(tvp) && !DOINGASYNC(tvp));
 	if (error)
 		goto bad;
 #ifdef MAC
 	if (dvp->v_mount->mnt_flag & MNT_MULTILABEL) {
 		error = mac_vnode_create_extattr(cnp->cn_cred, dvp->v_mount,
 		    dvp, tvp, cnp);
 		if (error)
 			goto bad;
 	}
 #endif
 #ifdef UFS_ACL
 	if (dvp->v_mount->mnt_flag & MNT_ACLS) {
 		error = ufs_do_posix1e_acl_inheritance_file(dvp, tvp, mode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	} else if (dvp->v_mount->mnt_flag & MNT_NFS4ACLS) {
 		error = ufs_do_nfs4_acl_inheritance(dvp, tvp, mode,
 		    cnp->cn_cred, cnp->cn_thread);
 		if (error)
 			goto bad;
 	}
 #endif /* !UFS_ACL */
 	ufs_makedirentry(ip, cnp, &newdir);
 	error = ufs_direnter(dvp, tvp, &newdir, cnp, NULL, 0);
 	if (error)
 		goto bad;
 	*vpp = tvp;
 	return (0);
 
 bad:
 	/*
 	 * Write error occurred trying to update the inode
 	 * or the directory so must deallocate the inode.
 	 */
 	ip->i_effnlink = 0;
 	ip->i_nlink = 0;
 	DIP_SET(ip, i_nlink, 0);
 	ip->i_flag |= IN_CHANGE;
 	if (DOINGSOFTDEP(tvp))
 		softdep_revert_create(VTOI(dvp), ip);
 	vput(tvp);
 	return (error);
 }
 
 static int
 ufs_ioctl(struct vop_ioctl_args *ap)
 {
 
 	switch (ap->a_command) {
 	case FIOSEEKDATA:
 	case FIOSEEKHOLE:
 		return (vn_bmap_seekhole(ap->a_vp, ap->a_command,
 		    (off_t *)ap->a_data, ap->a_cred));
 	default:
 		return (ENOTTY);
 	}
 }
 
 /* Global vfs data structures for ufs. */
 struct vop_vector ufs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_read =		VOP_PANIC,
 	.vop_reallocblks =	VOP_PANIC,
 	.vop_write =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_bmap =		ufs_bmap,
 	.vop_cachedlookup =	ufs_lookup,
 	.vop_close =		ufs_close,
 	.vop_create =		ufs_create,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_ioctl =		ufs_ioctl,
 	.vop_link =		ufs_link,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_markatime =	ufs_markatime,
 	.vop_mkdir =		ufs_mkdir,
 	.vop_mknod =		ufs_mknod,
 	.vop_open =		ufs_open,
 	.vop_pathconf =		ufs_pathconf,
 	.vop_poll =		vop_stdpoll,
 	.vop_print =		ufs_print,
 	.vop_readdir =		ufs_readdir,
 	.vop_readlink =		ufs_readlink,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_remove =		ufs_remove,
 	.vop_rename =		ufs_rename,
 	.vop_rmdir =		ufs_rmdir,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_strategy =		ufs_strategy,
 	.vop_symlink =		ufs_symlink,
 	.vop_whiteout =		ufs_whiteout,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
 
 struct vop_vector ufs_fifoops = {
 	.vop_default =		&fifo_specops,
 	.vop_fsync =		VOP_PANIC,
 	.vop_accessx =		ufs_accessx,
 	.vop_close =		ufsfifo_close,
 	.vop_getattr =		ufs_getattr,
 	.vop_inactive =		ufs_inactive,
 	.vop_kqfilter =		ufsfifo_kqfilter,
 	.vop_markatime =	ufs_markatime,
 	.vop_pathconf = 	ufsfifo_pathconf,
 	.vop_print =		ufs_print,
 	.vop_read =		VOP_PANIC,
 	.vop_reclaim =		ufs_reclaim,
 	.vop_setattr =		ufs_setattr,
 #ifdef MAC
 	.vop_setlabel =		vop_stdsetlabel_ea,
 #endif
 	.vop_write =		VOP_PANIC,
 #ifdef UFS_EXTATTR
 	.vop_getextattr =	ufs_getextattr,
 	.vop_deleteextattr =	ufs_deleteextattr,
 	.vop_setextattr =	ufs_setextattr,
 #endif
 #ifdef UFS_ACL
 	.vop_getacl =		ufs_getacl,
 	.vop_setacl =		ufs_setacl,
 	.vop_aclcheck =		ufs_aclcheck,
 #endif
 };
Index: head/tests/sys/kern/pipe/pipe_ino_test.c
===================================================================
--- head/tests/sys/kern/pipe/pipe_ino_test.c	(revision 311521)
+++ head/tests/sys/kern/pipe/pipe_ino_test.c	(revision 311522)
@@ -1,65 +1,67 @@
 /*-
  * Copyright (c) 2011 Giovanni Trematerra <giovanni.trematerra@gmail.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 /*
  * $FreeBSD$
  * Test conformance to stat(2) SUSv4 description:
  *  "For all other file types defined in this volume of POSIX.1-2008, the
  *  structure members st_mode, st_ino, st_dev, st_uid, st_gid, st_atim,
  *  st_ctim, and st_mtim shall have meaningful values ...".
  * Check that st_dev and st_ino are meaningful.
  */
 
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <err.h>
 #include <stdio.h>
 #include <unistd.h>
 
 int
 main(void)
 {
 	int pipefd[2];
 	struct stat st1, st2;
 
 	if (pipe(pipefd) == -1)
 		err(1, "FAIL: pipe");
 
 	if (fstat(pipefd[0], &st1) == -1)
 		err(1, "FAIL: fstat st1");
 	if (fstat(pipefd[1], &st2) == -1)
 		err(1, "FAIL: fstat st2");
 	if (st1.st_dev != st2.st_dev || st1.st_dev == 0 || st2.st_dev == 0)
-		errx(1, "FAIL: wrong dev number %d %d", st1.st_dev, st2.st_dev);
+		errx(1, "FAIL: wrong dev number %ju %ju",
+		    (uintmax_t)st1.st_dev, (uintmax_t)st2.st_dev);
 	if (st1.st_ino == st2.st_ino)
-		errx(1, "FAIL: inode numbers are equal: %d", st1.st_ino);
+		errx(1, "FAIL: inode numbers are equal: %ju",
+		    (uintmax_t)st1.st_ino);
 
 	close(pipefd[0]);
 	close(pipefd[1]);
 	printf("PASS\n");
 
 	return (0);
 }
Index: head/usr.bin/find/ls.c
===================================================================
--- head/usr.bin/find/ls.c	(revision 311521)
+++ head/usr.bin/find/ls.c	(revision 311522)
@@ -1,119 +1,120 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #ifndef lint
 #if 0
 static char sccsid[] = "@(#)ls.c	8.1 (Berkeley) 6/6/93";
 #endif
 #endif /* not lint */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fts.h>
 #include <grp.h>
 #include <inttypes.h>
 #include <langinfo.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
 #include "find.h"
 
 /* Derived from the print routines in the ls(1) source code. */
 
 static void printlink(char *);
 static void printtime(time_t);
 
 void
 printlong(char *name, char *accpath, struct stat *sb)
 {
 	char modep[15];
 
 	(void)printf("%6ju %8"PRId64" ", (uintmax_t)sb->st_ino, sb->st_blocks);
 	(void)strmode(sb->st_mode, modep);
-	(void)printf("%s %3u %-*s %-*s ", modep, sb->st_nlink, MAXLOGNAME - 1,
+	(void)printf("%s %3ju %-*s %-*s ", modep, (uintmax_t)sb->st_nlink,
+	    MAXLOGNAME - 1,
 	    user_from_uid(sb->st_uid, 0), MAXLOGNAME - 1,
 	    group_from_gid(sb->st_gid, 0));
 
 	if (S_ISCHR(sb->st_mode) || S_ISBLK(sb->st_mode))
 		(void)printf("%#8jx ", (uintmax_t)sb->st_rdev);
 	else
 		(void)printf("%8"PRId64" ", sb->st_size);
 	printtime(sb->st_mtime);
 	(void)printf("%s", name);
 	if (S_ISLNK(sb->st_mode))
 		printlink(accpath);
 	(void)putchar('\n');
 }
 
 static void
 printtime(time_t ftime)
 {
 	char longstring[80];
 	static time_t lnow;
 	const char *format;
 	static int d_first = -1;
 
 	if (d_first < 0)
 		d_first = (*nl_langinfo(D_MD_ORDER) == 'd');
 	if (lnow == 0)
 		lnow = time(NULL);
 
 #define	SIXMONTHS	((365 / 2) * 86400)
 	if (ftime + SIXMONTHS > lnow && ftime < lnow + SIXMONTHS)
 		/* mmm dd hh:mm || dd mmm hh:mm */
 		format = d_first ? "%e %b %R " : "%b %e %R ";
 	else
 		/* mmm dd  yyyy || dd mmm  yyyy */
 		format = d_first ? "%e %b  %Y " : "%b %e  %Y ";
 	strftime(longstring, sizeof(longstring), format, localtime(&ftime));
 	fputs(longstring, stdout);
 }
 
 static void
 printlink(char *name)
 {
 	int lnklen;
 	char path[MAXPATHLEN];
 
 	if ((lnklen = readlink(name, path, MAXPATHLEN - 1)) == -1) {
 		warn("%s", name);
 		return;
 	}
 	path[lnklen] = '\0';
 	(void)printf(" -> %s", path);
 }
Index: head/usr.bin/gzip/gzip.c
===================================================================
--- head/usr.bin/gzip/gzip.c	(revision 311521)
+++ head/usr.bin/gzip/gzip.c	(revision 311522)
@@ -1,2174 +1,2174 @@
 /*	$NetBSD: gzip.c,v 1.109 2015/10/27 07:36:18 mrg Exp $	*/
 
 /*-
  * Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  */
 
 #include <sys/cdefs.h>
 #ifndef lint
 __COPYRIGHT("@(#) Copyright (c) 1997, 1998, 2003, 2004, 2006\
  Matthew R. Green.  All rights reserved.");
 __FBSDID("$FreeBSD$");
 #endif /* not lint */
 
 /*
  * gzip.c -- GPL free gzip using zlib.
  *
  * RFC 1950 covers the zlib format
  * RFC 1951 covers the deflate format
  * RFC 1952 covers the gzip format
  *
  * TODO:
  *	- use mmap where possible
  *	- make bzip2/compress -v/-t/-l support work as well as possible
  */
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 
 #include <inttypes.h>
 #include <unistd.h>
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <zlib.h>
 #include <fts.h>
 #include <libgen.h>
 #include <stdarg.h>
 #include <getopt.h>
 #include <time.h>
 
 /* what type of file are we dealing with */
 enum filetype {
 	FT_GZIP,
 #ifndef NO_BZIP2_SUPPORT
 	FT_BZIP2,
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	FT_Z,
 #endif
 #ifndef NO_PACK_SUPPORT
 	FT_PACK,
 #endif
 #ifndef NO_XZ_SUPPORT
 	FT_XZ,
 #endif
 	FT_LAST,
 	FT_UNKNOWN
 };
 
 #ifndef NO_BZIP2_SUPPORT
 #include <bzlib.h>
 
 #define BZ2_SUFFIX	".bz2"
 #define BZIP2_MAGIC	"BZh"
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 #define Z_SUFFIX	".Z"
 #define Z_MAGIC		"\037\235"
 #endif
 
 #ifndef NO_PACK_SUPPORT
 #define PACK_MAGIC	"\037\036"
 #endif
 
 #ifndef NO_XZ_SUPPORT
 #include <lzma.h>
 #define XZ_SUFFIX	".xz"
 #define XZ_MAGIC	"\3757zXZ"
 #endif
 
 #define GZ_SUFFIX	".gz"
 
 #define BUFLEN		(64 * 1024)
 
 #define GZIP_MAGIC0	0x1F
 #define GZIP_MAGIC1	0x8B
 #define GZIP_OMAGIC1	0x9E
 
 #define GZIP_TIMESTAMP	(off_t)4
 #define GZIP_ORIGNAME	(off_t)10
 
 #define HEAD_CRC	0x02
 #define EXTRA_FIELD	0x04
 #define ORIG_NAME	0x08
 #define COMMENT		0x10
 
 #define OS_CODE		3	/* Unix */
 
 typedef struct {
     const char	*zipped;
     int		ziplen;
     const char	*normal;	/* for unzip - must not be longer than zipped */
 } suffixes_t;
 static suffixes_t suffixes[] = {
 #define	SUFFIX(Z, N) {Z, sizeof Z - 1, N}
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S .xxx */
 #ifndef SMALL
 	SUFFIX(GZ_SUFFIX,	""),
 	SUFFIX(".z",		""),
 	SUFFIX("-gz",		""),
 	SUFFIX("-z",		""),
 	SUFFIX("_z",		""),
 	SUFFIX(".taz",		".tar"),
 	SUFFIX(".tgz",		".tar"),
 #ifndef NO_BZIP2_SUPPORT
 	SUFFIX(BZ2_SUFFIX,	""),
 	SUFFIX(".tbz",		".tar"),
 	SUFFIX(".tbz2",		".tar"),
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	SUFFIX(Z_SUFFIX,	""),
 #endif
 #ifndef NO_XZ_SUPPORT
 	SUFFIX(XZ_SUFFIX,	""),
 #endif
 	SUFFIX(GZ_SUFFIX,	""),	/* Overwritten by -S "" */
 #endif /* SMALL */
 #undef SUFFIX
 };
 #define NUM_SUFFIXES (nitems(suffixes))
 #define SUFFIX_MAXLEN	30
 
 static	const char	gzip_version[] = "FreeBSD gzip 20150413";
 
 #ifndef SMALL
 static	const char	gzip_copyright[] = \
 "   Copyright (c) 1997, 1998, 2003, 2004, 2006 Matthew R. Green\n"
 "   All rights reserved.\n"
 "\n"
 "   Redistribution and use in source and binary forms, with or without\n"
 "   modification, are permitted provided that the following conditions\n"
 "   are met:\n"
 "   1. Redistributions of source code must retain the above copyright\n"
 "      notice, this list of conditions and the following disclaimer.\n"
 "   2. Redistributions in binary form must reproduce the above copyright\n"
 "      notice, this list of conditions and the following disclaimer in the\n"
 "      documentation and/or other materials provided with the distribution.\n"
 "\n"
 "   THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR\n"
 "   IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES\n"
 "   OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.\n"
 "   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,\n"
 "   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,\n"
 "   BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;\n"
 "   LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED\n"
 "   AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n"
 "   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n"
 "   OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n"
 "   SUCH DAMAGE.";
 #endif
 
 static	int	cflag;			/* stdout mode */
 static	int	dflag;			/* decompress mode */
 static	int	lflag;			/* list mode */
 static	int	numflag = 6;		/* gzip -1..-9 value */
 
 #ifndef SMALL
 static	int	fflag;			/* force mode */
 static	int	kflag;			/* don't delete input files */
 static	int	nflag;			/* don't save name/timestamp */
 static	int	Nflag;			/* don't restore name/timestamp */
 static	int	qflag;			/* quiet mode */
 static	int	rflag;			/* recursive mode */
 static	int	tflag;			/* test */
 static	int	vflag;			/* verbose mode */
 static	const char *remove_file = NULL;	/* file to be removed upon SIGINT */
 #else
 #define		qflag	0
 #define		tflag	0
 #endif
 
 static	int	exit_value = 0;		/* exit value */
 
 static	char	*infile;		/* name of file coming in */
 
 static	void	maybe_err(const char *fmt, ...) __printflike(1, 2) __dead2;
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 static	void	maybe_errx(const char *fmt, ...) __printflike(1, 2) __dead2;
 #endif
 static	void	maybe_warn(const char *fmt, ...) __printflike(1, 2);
 static	void	maybe_warnx(const char *fmt, ...) __printflike(1, 2);
 static	enum filetype file_gettype(u_char *);
 #ifdef SMALL
 #define gz_compress(if, of, sz, fn, tm) gz_compress(if, of, sz)
 #endif
 static	off_t	gz_compress(int, int, off_t *, const char *, uint32_t);
 static	off_t	gz_uncompress(int, int, char *, size_t, off_t *, const char *);
 static	off_t	file_compress(char *, char *, size_t);
 static	off_t	file_uncompress(char *, char *, size_t);
 static	void	handle_pathname(char *);
 static	void	handle_file(char *, struct stat *);
 static	void	handle_stdin(void);
 static	void	handle_stdout(void);
 static	void	print_ratio(off_t, off_t, FILE *);
 static	void	print_list(int fd, off_t, const char *, time_t);
 static	void	usage(void) __dead2;
 static	void	display_version(void) __dead2;
 #ifndef SMALL
 static	void	display_license(void);
 static	void	sigint_handler(int);
 #endif
 static	const suffixes_t *check_suffix(char *, int);
 static	ssize_t	read_retry(int, void *, size_t);
 
 #ifdef SMALL
 #define unlink_input(f, sb) unlink(f)
 #else
 static	off_t	cat_fd(unsigned char *, size_t, off_t *, int fd);
 static	void	prepend_gzip(char *, int *, char ***);
 static	void	handle_dir(char *);
 static	void	print_verbage(const char *, const char *, off_t, off_t);
 static	void	print_test(const char *, int);
 static	void	copymodes(int fd, const struct stat *, const char *file);
 static	int	check_outfile(const char *outfile);
 #endif
 
 #ifndef NO_BZIP2_SUPPORT
 static	off_t	unbzip2(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 static	FILE 	*zdopen(int);
 static	off_t	zuncompress(FILE *, FILE *, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_PACK_SUPPORT
 static	off_t	unpack(int, int, char *, size_t, off_t *);
 #endif
 
 #ifndef NO_XZ_SUPPORT
 static	off_t	unxz(int, int, char *, size_t, off_t *);
 #endif
 
 #ifdef SMALL
 #define getopt_long(a,b,c,d,e) getopt(a,b,c)
 #else
 static const struct option longopts[] = {
 	{ "stdout",		no_argument,		0,	'c' },
 	{ "to-stdout",		no_argument,		0,	'c' },
 	{ "decompress",		no_argument,		0,	'd' },
 	{ "uncompress",		no_argument,		0,	'd' },
 	{ "force",		no_argument,		0,	'f' },
 	{ "help",		no_argument,		0,	'h' },
 	{ "keep",		no_argument,		0,	'k' },
 	{ "list",		no_argument,		0,	'l' },
 	{ "no-name",		no_argument,		0,	'n' },
 	{ "name",		no_argument,		0,	'N' },
 	{ "quiet",		no_argument,		0,	'q' },
 	{ "recursive",		no_argument,		0,	'r' },
 	{ "suffix",		required_argument,	0,	'S' },
 	{ "test",		no_argument,		0,	't' },
 	{ "verbose",		no_argument,		0,	'v' },
 	{ "version",		no_argument,		0,	'V' },
 	{ "fast",		no_argument,		0,	'1' },
 	{ "best",		no_argument,		0,	'9' },
 	{ "ascii",		no_argument,		0,	'a' },
 	{ "license",		no_argument,		0,	'L' },
 	{ NULL,			no_argument,		0,	0 },
 };
 #endif
 
 int
 main(int argc, char **argv)
 {
 	const char *progname = getprogname();
 #ifndef SMALL
 	char *gzip;
 	int len;
 #endif
 	int ch;
 
 #ifndef SMALL
 	if ((gzip = getenv("GZIP")) != NULL)
 		prepend_gzip(gzip, &argc, &argv);
 	signal(SIGINT, sigint_handler);
 #endif
 
 	/*
 	 * XXX
 	 * handle being called `gunzip', `zcat' and `gzcat'
 	 */
 	if (strcmp(progname, "gunzip") == 0)
 		dflag = 1;
 	else if (strcmp(progname, "zcat") == 0 ||
 		 strcmp(progname, "gzcat") == 0)
 		dflag = cflag = 1;
 
 #ifdef SMALL
 #define OPT_LIST "123456789cdhlV"
 #else
 #define OPT_LIST "123456789acdfhklLNnqrS:tVv"
 #endif
 
 	while ((ch = getopt_long(argc, argv, OPT_LIST, longopts, NULL)) != -1) {
 		switch (ch) {
 		case '1': case '2': case '3':
 		case '4': case '5': case '6':
 		case '7': case '8': case '9':
 			numflag = ch - '0';
 			break;
 		case 'c':
 			cflag = 1;
 			break;
 		case 'd':
 			dflag = 1;
 			break;
 		case 'l':
 			lflag = 1;
 			dflag = 1;
 			break;
 		case 'V':
 			display_version();
 			/* NOTREACHED */
 #ifndef SMALL
 		case 'a':
 			fprintf(stderr, "%s: option --ascii ignored on this system\n", progname);
 			break;
 		case 'f':
 			fflag = 1;
 			break;
 		case 'k':
 			kflag = 1;
 			break;
 		case 'L':
 			display_license();
 			/* NOT REACHED */
 		case 'N':
 			nflag = 0;
 			Nflag = 1;
 			break;
 		case 'n':
 			nflag = 1;
 			Nflag = 0;
 			break;
 		case 'q':
 			qflag = 1;
 			break;
 		case 'r':
 			rflag = 1;
 			break;
 		case 'S':
 			len = strlen(optarg);
 			if (len != 0) {
 				if (len > SUFFIX_MAXLEN)
 					errx(1, "incorrect suffix: '%s': too long", optarg);
 				suffixes[0].zipped = optarg;
 				suffixes[0].ziplen = len;
 			} else {
 				suffixes[NUM_SUFFIXES - 1].zipped = "";
 				suffixes[NUM_SUFFIXES - 1].ziplen = 0;
 			}
 			break;
 		case 't':
 			cflag = 1;
 			tflag = 1;
 			dflag = 1;
 			break;
 		case 'v':
 			vflag = 1;
 			break;
 #endif
 		default:
 			usage();
 			/* NOTREACHED */
 		}
 	}
 	argv += optind;
 	argc -= optind;
 
 	if (argc == 0) {
 		if (dflag)	/* stdin mode */
 			handle_stdin();
 		else		/* stdout mode */
 			handle_stdout();
 	} else {
 		do {
 			handle_pathname(argv[0]);
 		} while (*++argv);
 	}
 #ifndef SMALL
 	if (qflag == 0 && lflag && argc > 1)
 		print_list(-1, 0, "(totals)", 0);
 #endif
 	exit(exit_value);
 }
 
 /* maybe print a warning */
 void
 maybe_warn(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* ... without an errno. */
 void
 maybe_warnx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	if (exit_value == 0)
 		exit_value = 1;
 }
 
 /* maybe print an error */
 void
 maybe_err(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarn(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 
 #if !defined(NO_BZIP2_SUPPORT) || !defined(NO_PACK_SUPPORT) ||	\
     !defined(NO_XZ_SUPPORT)
 /* ... without an errno. */
 void
 maybe_errx(const char *fmt, ...)
 {
 	va_list ap;
 
 	if (qflag == 0) {
 		va_start(ap, fmt);
 		vwarnx(fmt, ap);
 		va_end(ap);
 	}
 	exit(2);
 }
 #endif
 
 #ifndef SMALL
 /* split up $GZIP and prepend it to the argument list */
 static void
 prepend_gzip(char *gzip, int *argc, char ***argv)
 {
 	char *s, **nargv, **ac;
 	int nenvarg = 0, i;
 
 	/* scan how many arguments there are */
 	for (s = gzip;;) {
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto count_done;
 		nenvarg++;
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				goto count_done;
 	}
 count_done:
 	/* punt early */
 	if (nenvarg == 0)
 		return;
 
 	*argc += nenvarg;
 	ac = *argv;
 
 	nargv = (char **)malloc((*argc + 1) * sizeof(char *));
 	if (nargv == NULL)
 		maybe_err("malloc");
 
 	/* stash this away */
 	*argv = nargv;
 
 	/* copy the program name first */
 	i = 0;
 	nargv[i++] = *(ac++);
 
 	/* take a copy of $GZIP and add it to the array */
 	s = strdup(gzip);
 	if (s == NULL)
 		maybe_err("strdup");
 	for (;;) {
 		/* Skip whitespaces. */
 		while (*s == ' ' || *s == '\t')
 			s++;
 		if (*s == 0)
 			goto copy_done;
 		nargv[i++] = s;
 		/* Find the end of this argument. */
 		while (*s != ' ' && *s != '\t')
 			if (*s++ == 0)
 				/* Argument followed by NUL. */
 				goto copy_done;
 		/* Terminate by overwriting ' ' or '\t' with NUL. */
 		*s++ = 0;
 	}
 copy_done:
 
 	/* copy the original arguments and a NULL */
 	while (*ac)
 		nargv[i++] = *(ac++);
 	nargv[i] = NULL;
 }
 #endif
 
 /* compress input to output. Return bytes read, -1 on error */
 static off_t
 gz_compress(int in, int out, off_t *gsizep, const char *origname, uint32_t mtime)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t in_tot = 0, out_tot = 0;
 	ssize_t in_size;
 	int i, error;
 	uLong crc;
 #ifdef SMALL
 	static char header[] = { GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED, 0,
 				 0, 0, 0, 0,
 				 0, OS_CODE };
 #endif
 
 	outbufp = malloc(BUFLEN);
 	inbufp = malloc(BUFLEN);
 	if (outbufp == NULL || inbufp == NULL) {
 		maybe_err("malloc failed");
 		goto out;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.zalloc = Z_NULL;
 	z.zfree = Z_NULL;
 	z.opaque = 0;
 
 #ifdef SMALL
 	memcpy(outbufp, header, sizeof header);
 	i = sizeof header;
 #else
 	if (nflag != 0) {
 		mtime = 0;
 		origname = "";
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c%c%c%s", 
 		     GZIP_MAGIC0, GZIP_MAGIC1, Z_DEFLATED,
 		     *origname ? ORIG_NAME : 0,
 		     mtime & 0xff,
 		     (mtime >> 8) & 0xff,
 		     (mtime >> 16) & 0xff,
 		     (mtime >> 24) & 0xff,
 		     numflag == 1 ? 4 : numflag == 9 ? 2 : 0,
 		     OS_CODE, origname);
 	if (i >= BUFLEN)     
 		/* this need PATH_MAX > BUFLEN ... */
 		maybe_err("snprintf");
 	if (*origname)
 		i++;
 #endif
 
 	z.next_out = (unsigned char *)outbufp + i;
 	z.avail_out = BUFLEN - i;
 
 	error = deflateInit2(&z, numflag, Z_DEFLATED,
 			     (-MAX_WBITS), 8, Z_DEFAULT_STRATEGY);
 	if (error != Z_OK) {
 		maybe_warnx("deflateInit2 failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	crc = crc32(0L, Z_NULL, 0);
 	for (;;) {
 		if (z.avail_out == 0) {
 			if (write(out, outbufp, BUFLEN) != BUFLEN) {
 				maybe_warn("write");
 				out_tot = -1;
 				goto out;
 			}
 
 			out_tot += BUFLEN;
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 		}
 
 		if (z.avail_in == 0) {
 			in_size = read(in, inbufp, BUFLEN);
 			if (in_size < 0) {
 				maybe_warn("read");
 				in_tot = -1;
 				goto out;
 			}
 			if (in_size == 0)
 				break;
 
 			crc = crc32(crc, (const Bytef *)inbufp, (unsigned)in_size);
 			in_tot += in_size;
 			z.next_in = (unsigned char *)inbufp;
 			z.avail_in = in_size;
 		}
 
 		error = deflate(&z, Z_NO_FLUSH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 	}
 
 	/* clean up */
 	for (;;) {
 		size_t len;
 		ssize_t w;
 
 		error = deflate(&z, Z_FINISH);
 		if (error != Z_OK && error != Z_STREAM_END) {
 			maybe_warnx("deflate failed");
 			in_tot = -1;
 			goto out;
 		}
 
 		len = (char *)z.next_out - outbufp;
 
 		w = write(out, outbufp, len);
 		if (w == -1 || (size_t)w != len) {
 			maybe_warn("write");
 			out_tot = -1;
 			goto out;
 		}
 		out_tot += len;
 		z.next_out = (unsigned char *)outbufp;
 		z.avail_out = BUFLEN;
 
 		if (error == Z_STREAM_END)
 			break;
 	}
 
 	if (deflateEnd(&z) != Z_OK) {
 		maybe_warnx("deflateEnd failed");
 		in_tot = -1;
 		goto out;
 	}
 
 	i = snprintf(outbufp, BUFLEN, "%c%c%c%c%c%c%c%c", 
 		 (int)crc & 0xff,
 		 (int)(crc >> 8) & 0xff,
 		 (int)(crc >> 16) & 0xff,
 		 (int)(crc >> 24) & 0xff,
 		 (int)in_tot & 0xff,
 		 (int)(in_tot >> 8) & 0xff,
 		 (int)(in_tot >> 16) & 0xff,
 		 (int)(in_tot >> 24) & 0xff);
 	if (i != 8)
 		maybe_err("snprintf");
 	if (write(out, outbufp, i) != i) {
 		maybe_warn("write");
 		in_tot = -1;
 	} else
 		out_tot += i;
 
 out:
 	if (inbufp != NULL)
 		free(inbufp);
 	if (outbufp != NULL)
 		free(outbufp);
 	if (gsizep)
 		*gsizep = out_tot;
 	return in_tot;
 }
 
 /*
  * uncompress input to output then close the input.  return the
  * uncompressed size written, and put the compressed sized read
  * into `*gsizep'.
  */
 static off_t
 gz_uncompress(int in, int out, char *pre, size_t prelen, off_t *gsizep,
 	      const char *filename)
 {
 	z_stream z;
 	char *outbufp, *inbufp;
 	off_t out_tot = -1, in_tot = 0;
 	uint32_t out_sub_tot = 0;
 	enum {
 		GZSTATE_MAGIC0,
 		GZSTATE_MAGIC1,
 		GZSTATE_METHOD,
 		GZSTATE_FLAGS,
 		GZSTATE_SKIPPING,
 		GZSTATE_EXTRA,
 		GZSTATE_EXTRA2,
 		GZSTATE_EXTRA3,
 		GZSTATE_ORIGNAME,
 		GZSTATE_COMMENT,
 		GZSTATE_HEAD_CRC1,
 		GZSTATE_HEAD_CRC2,
 		GZSTATE_INIT,
 		GZSTATE_READ,
 		GZSTATE_CRC,
 		GZSTATE_LEN,
 	} state = GZSTATE_MAGIC0;
 	int flags = 0, skip_count = 0;
 	int error = Z_STREAM_ERROR, done_reading = 0;
 	uLong crc = 0;
 	ssize_t wr;
 	int needmore = 0;
 
 #define ADVANCE()       { z.next_in++; z.avail_in--; }
 
 	if ((outbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out2;
 	}
 	if ((inbufp = malloc(BUFLEN)) == NULL) {
 		maybe_err("malloc failed");
 		goto out1;
 	}
 
 	memset(&z, 0, sizeof z);
 	z.avail_in = prelen;
 	z.next_in = (unsigned char *)pre;
 	z.avail_out = BUFLEN;
 	z.next_out = (unsigned char *)outbufp;
 	z.zalloc = NULL;
 	z.zfree = NULL;
 	z.opaque = 0;
 
 	in_tot = prelen;
 	out_tot = 0;
 
 	for (;;) {
 		if ((z.avail_in == 0 || needmore) && done_reading == 0) {
 			ssize_t in_size;
 
 			if (z.avail_in > 0) {
 				memmove(inbufp, z.next_in, z.avail_in);
 			}
 			z.next_in = (unsigned char *)inbufp;
 			in_size = read(in, z.next_in + z.avail_in,
 			    BUFLEN - z.avail_in);
 
 			if (in_size == -1) {
 				maybe_warn("failed to read stdin");
 				goto stop_and_fail;
 			} else if (in_size == 0) {
 				done_reading = 1;
 			}
 
 			z.avail_in += in_size;
 			needmore = 0;
 
 			in_tot += in_size;
 		}
 		if (z.avail_in == 0) {
 			if (done_reading && state != GZSTATE_MAGIC0) {
 				maybe_warnx("%s: unexpected end of file",
 					    filename);
 				goto stop_and_fail;
 			}
 			goto stop;
 		}
 		switch (state) {
 		case GZSTATE_MAGIC0:
 			if (*z.next_in != GZIP_MAGIC0) {
 				if (in_tot > 0) {
 					maybe_warnx("%s: trailing garbage "
 						    "ignored", filename);
 					exit_value = 2;
 					goto stop;
 				}
 				maybe_warnx("input not gziped (MAGIC0)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			out_sub_tot = 0;
 			crc = crc32(0L, Z_NULL, 0);
 			break;
 
 		case GZSTATE_MAGIC1:
 			if (*z.next_in != GZIP_MAGIC1 &&
 			    *z.next_in != GZIP_OMAGIC1) {
 				maybe_warnx("input not gziped (MAGIC1)");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_METHOD:
 			if (*z.next_in != Z_DEFLATED) {
 				maybe_warnx("unknown compression method");
 				goto stop_and_fail;
 			}
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_FLAGS:
 			flags = *z.next_in;
 			ADVANCE();
 			skip_count = 6;
 			state++;
 			break;
 
 		case GZSTATE_SKIPPING:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_EXTRA:
 			if ((flags & EXTRA_FIELD) == 0) {
 				state = GZSTATE_ORIGNAME;
 				break;
 			}
 			skip_count = *z.next_in;
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA2:
 			skip_count |= ((*z.next_in) << 8);
 			ADVANCE();
 			state++;
 			break;
 
 		case GZSTATE_EXTRA3:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_ORIGNAME:
 			if ((flags & ORIG_NAME) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_COMMENT:
 			if ((flags & COMMENT) == 0) {
 				state++;
 				break;
 			}
 			if (*z.next_in == 0)
 				state++;
 			ADVANCE();
 			break;
 
 		case GZSTATE_HEAD_CRC1:
 			if (flags & HEAD_CRC)
 				skip_count = 2;
 			else
 				skip_count = 0;
 			state++;
 			break;
 
 		case GZSTATE_HEAD_CRC2:
 			if (skip_count > 0) {
 				skip_count--;
 				ADVANCE();
 			} else
 				state++;
 			break;
 
 		case GZSTATE_INIT:
 			if (inflateInit2(&z, -MAX_WBITS) != Z_OK) {
 				maybe_warnx("failed to inflateInit");
 				goto stop_and_fail;
 			}
 			state++;
 			break;
 
 		case GZSTATE_READ:
 			error = inflate(&z, Z_FINISH);
 			switch (error) {
 			/* Z_BUF_ERROR goes with Z_FINISH... */
 			case Z_BUF_ERROR:
 				if (z.avail_out > 0 && !done_reading)
 					continue;
 
 			case Z_STREAM_END:
 			case Z_OK:
 				break;
 
 			case Z_NEED_DICT:
 				maybe_warnx("Z_NEED_DICT error");
 				goto stop_and_fail;
 			case Z_DATA_ERROR:
 				maybe_warnx("data stream error");
 				goto stop_and_fail;
 			case Z_STREAM_ERROR:
 				maybe_warnx("internal stream error");
 				goto stop_and_fail;
 			case Z_MEM_ERROR:
 				maybe_warnx("memory allocation error");
 				goto stop_and_fail;
 
 			default:
 				maybe_warn("unknown error from inflate(): %d",
 				    error);
 			}
 			wr = BUFLEN - z.avail_out;
 
 			if (wr != 0) {
 				crc = crc32(crc, (const Bytef *)outbufp, (unsigned)wr);
 				if (
 #ifndef SMALL
 				    /* don't write anything with -t */
 				    tflag == 0 &&
 #endif
 				    write(out, outbufp, wr) != wr) {
 					maybe_warn("error writing to output");
 					goto stop_and_fail;
 				}
 
 				out_tot += wr;
 				out_sub_tot += wr;
 			}
 
 			if (error == Z_STREAM_END) {
 				inflateEnd(&z);
 				state++;
 			}
 
 			z.next_out = (unsigned char *)outbufp;
 			z.avail_out = BUFLEN;
 
 			break;
 		case GZSTATE_CRC:
 			{
 				uLong origcrc;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origcrc = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 				if (origcrc != crc) {
 					maybe_warnx("invalid compressed"
 					     " data--crc error");
 					goto stop_and_fail;
 				}
 			}
 
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (!z.avail_in && done_reading) {
 				goto stop;
 			}
 			state++;
 			break;
 		case GZSTATE_LEN:
 			{
 				uLong origlen;
 
 				if (z.avail_in < 4) {
 					if (!done_reading) {
 						needmore = 1;
 						continue;
 					}
 					maybe_warnx("truncated input");
 					goto stop_and_fail;
 				}
 				origlen = ((unsigned)z.next_in[0] & 0xff) |
 					((unsigned)z.next_in[1] & 0xff) << 8 |
 					((unsigned)z.next_in[2] & 0xff) << 16 |
 					((unsigned)z.next_in[3] & 0xff) << 24;
 
 				if (origlen != out_sub_tot) {
 					maybe_warnx("invalid compressed"
 					     " data--length error");
 					goto stop_and_fail;
 				}
 			}
 				
 			z.avail_in -= 4;
 			z.next_in += 4;
 
 			if (error < 0) {
 				maybe_warnx("decompression error");
 				goto stop_and_fail;
 			}
 			state = GZSTATE_MAGIC0;
 			break;
 		}
 		continue;
 stop_and_fail:
 		out_tot = -1;
 stop:
 		break;
 	}
 	if (state > GZSTATE_INIT)
 		inflateEnd(&z);
 
 	free(inbufp);
 out1:
 	free(outbufp);
 out2:
 	if (gsizep)
 		*gsizep = in_tot;
 	return (out_tot);
 }
 
 #ifndef SMALL
 /*
  * set the owner, mode, flags & utimes using the given file descriptor.
  * file is only used in possible warning messages.
  */
 static void
 copymodes(int fd, const struct stat *sbp, const char *file)
 {
 	struct timespec times[2];
 	struct stat sb;
 
 	/*
 	 * If we have no info on the input, give this file some
 	 * default values and return..
 	 */
 	if (sbp == NULL) {
 		mode_t mask = umask(022);
 
 		(void)fchmod(fd, DEFFILEMODE & ~mask);
 		(void)umask(mask);
 		return; 
 	}
 	sb = *sbp;
 
 	/* if the chown fails, remove set-id bits as-per compress(1) */
 	if (fchown(fd, sb.st_uid, sb.st_gid) < 0) {
 		if (errno != EPERM)
 			maybe_warn("couldn't fchown: %s", file);
 		sb.st_mode &= ~(S_ISUID|S_ISGID);
 	}
 
 	/* we only allow set-id and the 9 normal permission bits */
 	sb.st_mode &= S_ISUID | S_ISGID | S_IRWXU | S_IRWXG | S_IRWXO;
 	if (fchmod(fd, sb.st_mode) < 0)
 		maybe_warn("couldn't fchmod: %s", file);
 
 	times[0] = sb.st_atim;
 	times[1] = sb.st_mtim;
 	if (futimens(fd, times) < 0)
 		maybe_warn("couldn't futimens: %s", file);
 
 	/* only try flags if they exist already */
         if (sb.st_flags != 0 && fchflags(fd, sb.st_flags) < 0)
 		maybe_warn("couldn't fchflags: %s", file);
 }
 #endif
 
 /* what sort of file is this? */
 static enum filetype
 file_gettype(u_char *buf)
 {
 
 	if (buf[0] == GZIP_MAGIC0 &&
 	    (buf[1] == GZIP_MAGIC1 || buf[1] == GZIP_OMAGIC1))
 		return FT_GZIP;
 	else
 #ifndef NO_BZIP2_SUPPORT
 	if (memcmp(buf, BZIP2_MAGIC, 3) == 0 &&
 	    buf[3] >= '0' && buf[3] <= '9')
 		return FT_BZIP2;
 	else
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	if (memcmp(buf, Z_MAGIC, 2) == 0)
 		return FT_Z;
 	else
 #endif
 #ifndef NO_PACK_SUPPORT
 	if (memcmp(buf, PACK_MAGIC, 2) == 0)
 		return FT_PACK;
 	else
 #endif
 #ifndef NO_XZ_SUPPORT
 	if (memcmp(buf, XZ_MAGIC, 4) == 0)	/* XXX: We only have 4 bytes */
 		return FT_XZ;
 	else
 #endif
 		return FT_UNKNOWN;
 }
 
 #ifndef SMALL
 /* check the outfile is OK. */
 static int
 check_outfile(const char *outfile)
 {
 	struct stat sb;
 	int ok = 1;
 
 	if (lflag == 0 && stat(outfile, &sb) == 0) {
 		if (fflag)
 			unlink(outfile);
 		else if (isatty(STDIN_FILENO)) {
 			char ans[10] = { 'n', '\0' };	/* default */
 
 			fprintf(stderr, "%s already exists -- do you wish to "
 					"overwrite (y or n)? " , outfile);
 			(void)fgets(ans, sizeof(ans) - 1, stdin);
 			if (ans[0] != 'y' && ans[0] != 'Y') {
 				fprintf(stderr, "\tnot overwriting\n");
 				ok = 0;
 			} else
 				unlink(outfile);
 		} else {
 			maybe_warnx("%s already exists -- skipping", outfile);
 			ok = 0;
 		}
 	}
 	return ok;
 }
 
 static void
 unlink_input(const char *file, const struct stat *sb)
 {
 	struct stat nsb;
 
 	if (kflag)
 		return;
 	if (stat(file, &nsb) != 0)
 		/* Must be gone already */
 		return;
 	if (nsb.st_dev != sb->st_dev || nsb.st_ino != sb->st_ino)
 		/* Definitely a different file */
 		return;
 	unlink(file);
 }
 
 static void
 sigint_handler(int signo __unused)
 {
 
 	if (remove_file != NULL)
 		unlink(remove_file);
 	_exit(2);
 }
 #endif
 
 static const suffixes_t *
 check_suffix(char *file, int xlate)
 {
 	const suffixes_t *s;
 	int len = strlen(file);
 	char *sp;
 
 	for (s = suffixes; s != suffixes + NUM_SUFFIXES; s++) {
 		/* if it doesn't fit in "a.suf", don't bother */
 		if (s->ziplen >= len)
 			continue;
 		sp = file + len - s->ziplen;
 		if (strcmp(s->zipped, sp) != 0)
 			continue;
 		if (xlate)
 			strcpy(sp, s->normal);
 		return s;
 	}
 	return NULL;
 }
 
 /*
  * compress the given file: create a corresponding .gz file and remove the
  * original.
  */
 static off_t
 file_compress(char *file, char *outfile, size_t outsize)
 {
 	int in;
 	int out;
 	off_t size, insize;
 #ifndef SMALL
 	struct stat isb, osb;
 	const suffixes_t *suff;
 #endif
 
 	in = open(file, O_RDONLY);
 	if (in == -1) {
 		maybe_warn("can't open %s", file);
 		return (-1);
 	}
 
 #ifndef SMALL
 	if (fstat(in, &isb) != 0) {
 		maybe_warn("couldn't stat: %s", file);
 		close(in);
 		return (-1);
 	}
 #endif
 
 	if (cflag == 0) {
 #ifndef SMALL
 		if (isb.st_nlink > 1 && fflag == 0) {
-			maybe_warnx("%s has %d other link%s -- skipping",
-			    file, isb.st_nlink - 1,
+			maybe_warnx("%s has %ju other link%s -- skipping",
+			    file, (uintmax_t)isb.st_nlink - 1,
 			    (isb.st_nlink - 1) == 1 ? "" : "s");
 			close(in);
 			return (-1);
 		}
 
 		if (fflag == 0 && (suff = check_suffix(file, 0)) &&
 		    suff->zipped[0] != 0) {
 			maybe_warnx("%s already has %s suffix -- unchanged",
 			    file, suff->zipped);
 			close(in);
 			return (-1);
 		}
 #endif
 
 		/* Add (usually) .gz to filename */
 		if ((size_t)snprintf(outfile, outsize, "%s%s",
 		    file, suffixes[0].zipped) >= outsize)
 			memcpy(outfile + outsize - suffixes[0].ziplen - 1,
 			    suffixes[0].zipped, suffixes[0].ziplen + 1);
 
 #ifndef SMALL
 		if (check_outfile(outfile) == 0) {
 			close(in);
 			return (-1);
 		}
 #endif
 	}
 
 	if (cflag == 0) {
 		out = open(outfile, O_WRONLY | O_CREAT | O_EXCL, 0600);
 		if (out == -1) {
 			maybe_warn("could not create output: %s", outfile);
 			fclose(stdin);
 			return (-1);
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		out = STDOUT_FILENO;
 
 	insize = gz_compress(in, out, &size, basename(file), (uint32_t)isb.st_mtime);
 
 	(void)close(in);
 
 	/*
 	 * If there was an error, insize will be -1.
 	 * If we compressed to stdout, just return the size.
 	 * Otherwise stat the file and check it is the correct size.
 	 * We only blow away the file if we can stat the output and it
 	 * has the expected size.
 	 */
 	if (cflag != 0)
 		return (insize == -1 ? -1 : size);
 
 #ifndef SMALL
 	if (fstat(out, &osb) != 0) {
 		maybe_warn("couldn't stat: %s", outfile);
 		goto bad_outfile;
 	}
 
 	if (osb.st_size != size) {
 		maybe_warnx("output file: %s wrong size (%ju != %ju), deleting",
 		    outfile, (uintmax_t)osb.st_size, (uintmax_t)size);
 		goto bad_outfile;
 	}
 
 	copymodes(out, &isb, outfile);
 	remove_file = NULL;
 #endif
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	/* output is good, ok to delete input */
 	unlink_input(file, &isb);
 	return (size);
 
 #ifndef SMALL
     bad_outfile:
 	if (close(out) == -1)
 		maybe_warn("couldn't close output");
 
 	maybe_warnx("leaving original %s", file);
 	unlink(outfile);
 	return (size);
 #endif
 }
 
 /* uncompress the given file and remove the original */
 static off_t
 file_uncompress(char *file, char *outfile, size_t outsize)
 {
 	struct stat isb, osb;
 	off_t size;
 	ssize_t rbytes;
 	unsigned char header1[4];
 	enum filetype method;
 	int fd, ofd, zfd = -1;
 #ifndef SMALL
 	ssize_t rv;
 	time_t timestamp = 0;
 	char name[PATH_MAX + 1];
 #endif
 
 	/* gather the old name info */
 
 	fd = open(file, O_RDONLY);
 	if (fd < 0) {
 		maybe_warn("can't open %s", file);
 		goto lose;
 	}
 
 	strlcpy(outfile, file, outsize);
 	if (check_suffix(outfile, 1) == NULL && !(cflag || lflag)) {
 		maybe_warnx("%s: unknown suffix -- ignored", file);
 		goto lose;
 	}
 
 	rbytes = read(fd, header1, sizeof header1);
 	if (rbytes != sizeof header1) {
 		/* we don't want to fail here. */
 #ifndef SMALL
 		if (fflag)
 			goto lose;
 #endif
 		if (rbytes == -1)
 			maybe_warn("can't read %s", file);
 		else
 			goto unexpected_EOF;
 		goto lose;
 	}
 
 	method = file_gettype(header1);
 #ifndef SMALL
 	if (fflag == 0 && method == FT_UNKNOWN) {
 		maybe_warnx("%s: not in gzip format", file);
 		goto lose;
 	}
 
 #endif
 
 #ifndef SMALL
 	if (method == FT_GZIP && Nflag) {
 		unsigned char ts[4];	/* timestamp */
 
 		rv = pread(fd, ts, sizeof ts, GZIP_TIMESTAMP);
 		if (rv >= 0 && rv < (ssize_t)(sizeof ts))
 			goto unexpected_EOF;
 		if (rv == -1) {
 			if (!fflag)
 				maybe_warn("can't read %s", file);
 			goto lose;
 		}
 		timestamp = ts[3] << 24 | ts[2] << 16 | ts[1] << 8 | ts[0];
 
 		if (header1[3] & ORIG_NAME) {
 			rbytes = pread(fd, name, sizeof(name) - 1, GZIP_ORIGNAME);
 			if (rbytes < 0) {
 				maybe_warn("can't read %s", file);
 				goto lose;
 			}
 			if (name[0] != '\0') {
 				char *dp, *nf;
 
 				/* Make sure that name is NUL-terminated */
 				name[rbytes] = '\0';
 
 				/* strip saved directory name */
 				nf = strrchr(name, '/');
 				if (nf == NULL)
 					nf = name;
 				else
 					nf++;
 
 				/* preserve original directory name */
 				dp = strrchr(file, '/');
 				if (dp == NULL)
 					dp = file;
 				else
 					dp++;
 				snprintf(outfile, outsize, "%.*s%.*s",
 						(int) (dp - file), 
 						file, (int) rbytes, nf);
 			}
 		}
 	}
 #endif
 	lseek(fd, 0, SEEK_SET);
 
 	if (cflag == 0 || lflag) {
 		if (fstat(fd, &isb) != 0)
 			goto lose;
 #ifndef SMALL
 		if (isb.st_nlink > 1 && lflag == 0 && fflag == 0) {
-			maybe_warnx("%s has %d other links -- skipping",
-			    file, isb.st_nlink - 1);
+			maybe_warnx("%s has %ju other links -- skipping",
+			    file, (uintmax_t)isb.st_nlink - 1);
 			goto lose;
 		}
 		if (nflag == 0 && timestamp)
 			isb.st_mtime = timestamp;
 		if (check_outfile(outfile) == 0)
 			goto lose;
 #endif
 	}
 
 	if (cflag == 0 && lflag == 0) {
 		zfd = open(outfile, O_WRONLY|O_CREAT|O_EXCL, 0600);
 		if (zfd == STDOUT_FILENO) {
 			/* We won't close STDOUT_FILENO later... */
 			zfd = dup(zfd);
 			close(STDOUT_FILENO);
 		}
 		if (zfd == -1) {
 			maybe_warn("can't open %s", outfile);
 			goto lose;
 		}
 #ifndef SMALL
 		remove_file = outfile;
 #endif
 	} else
 		zfd = STDOUT_FILENO;
 
 	switch (method) {
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with bzip2 files");
 			goto lose;
 		}
 
 		size = unbzip2(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z: {
 		FILE *in, *out;
 
 		/* XXX */
 		if (lflag) {
 			maybe_warnx("no -l with Lempel-Ziv files");
 			goto lose;
 		}
 
 		if ((in = zdopen(fd)) == NULL) {
 			maybe_warn("zdopen for read: %s", file);
 			goto lose;
 		}
 
 		out = fdopen(dup(zfd), "w");
 		if (out == NULL) {
 			maybe_warn("fdopen for write: %s", outfile);
 			fclose(in);
 			goto lose;
 		}
 
 		size = zuncompress(in, out, NULL, 0, NULL);
 		/* need to fclose() if ferror() is true... */
 		if (ferror(in) | fclose(in)) {
 			maybe_warn("failed infile fclose");
 			unlink(outfile);
 			(void)fclose(out);
 		}
 		if (fclose(out) != 0) {
 			maybe_warn("failed outfile fclose");
 			unlink(outfile);
 			goto lose;
 		}
 		break;
 	}
 #endif
 
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		if (lflag) {
 			maybe_warnx("no -l with packed files");
 			goto lose;
 		}
 
 		size = unpack(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		if (lflag) {
 			maybe_warnx("no -l with xz files");
 			goto lose;
 		}
 
 		size = unxz(fd, zfd, NULL, 0, NULL);
 		break;
 #endif
 
 #ifndef SMALL
 	case FT_UNKNOWN:
 		if (lflag) {
 			maybe_warnx("no -l for unknown filetypes");
 			goto lose;
 		}
 		size = cat_fd(NULL, 0, NULL, fd);
 		break;
 #endif
 	default:
 		if (lflag) {
 			print_list(fd, isb.st_size, outfile, isb.st_mtime);
 			close(fd);
 			return -1;	/* XXX */
 		}
 
 		size = gz_uncompress(fd, zfd, NULL, 0, NULL, file);
 		break;
 	}
 
 	if (close(fd) != 0)
 		maybe_warn("couldn't close input");
 	if (zfd != STDOUT_FILENO && close(zfd) != 0)
 		maybe_warn("couldn't close output");
 
 	if (size == -1) {
 		if (cflag == 0)
 			unlink(outfile);
 		maybe_warnx("%s: uncompress failed", file);
 		return -1;
 	}
 
 	/* if testing, or we uncompressed to stdout, this is all we need */
 #ifndef SMALL
 	if (tflag)
 		return size;
 #endif
 	/* if we are uncompressing to stdin, don't remove the file. */
 	if (cflag)
 		return size;
 
 	/*
 	 * if we create a file...
 	 */
 	/*
 	 * if we can't stat the file don't remove the file.
 	 */
 
 	ofd = open(outfile, O_RDWR, 0);
 	if (ofd == -1) {
 		maybe_warn("couldn't open (leaving original): %s",
 			   outfile);
 		return -1;
 	}
 	if (fstat(ofd, &osb) != 0) {
 		maybe_warn("couldn't stat (leaving original): %s",
 			   outfile);
 		close(ofd);
 		return -1;
 	}
 	if (osb.st_size != size) {
 		maybe_warnx("stat gave different size: %ju != %ju (leaving original)",
 		    (uintmax_t)size, (uintmax_t)osb.st_size);
 		close(ofd);
 		unlink(outfile);
 		return -1;
 	}
 #ifndef SMALL
 	copymodes(ofd, &isb, outfile);
 	remove_file = NULL;
 #endif
 	close(ofd);
 	unlink_input(file, &isb);
 	return size;
 
     unexpected_EOF:
 	maybe_warnx("%s: unexpected end of file", file);
     lose:
 	if (fd != -1)
 		close(fd);
 	if (zfd != -1 && zfd != STDOUT_FILENO)
 		close(fd);
 	return -1;
 }
 
 #ifndef SMALL
 static off_t
 cat_fd(unsigned char * prepend, size_t count, off_t *gsizep, int fd)
 {
 	char buf[BUFLEN];
 	off_t in_tot;
 	ssize_t w;
 
 	in_tot = count;
 	w = write(STDOUT_FILENO, prepend, count);
 	if (w == -1 || (size_t)w != count) {
 		maybe_warn("write to stdout");
 		return -1;
 	}
 	for (;;) {
 		ssize_t rv;
 
 		rv = read(fd, buf, sizeof buf);
 		if (rv == 0)
 			break;
 		if (rv < 0) {
 			maybe_warn("read from fd %d", fd);
 			break;
 		}
 
 		if (write(STDOUT_FILENO, buf, rv) != rv) {
 			maybe_warn("write to stdout");
 			break;
 		}
 		in_tot += rv;
 	}
 
 	if (gsizep)
 		*gsizep = in_tot;
 	return (in_tot);
 }
 #endif
 
 static void
 handle_stdin(void)
 {
 	unsigned char header1[4];
 	off_t usize, gsize;
 	enum filetype method;
 	ssize_t bytes_read;
 #ifndef NO_COMPRESS_SUPPORT
 	FILE *in;
 #endif
 
 #ifndef SMALL
 	if (fflag == 0 && lflag == 0 && isatty(STDIN_FILENO)) {
 		maybe_warnx("standard input is a terminal -- ignoring");
 		return;
 	}
 #endif
 
 	if (lflag) {
 		struct stat isb;
 
 		/* XXX could read the whole file, etc. */
 		if (fstat(STDIN_FILENO, &isb) < 0) {
 			maybe_warn("fstat");
 			return;
 		}
 		print_list(STDIN_FILENO, isb.st_size, "stdout", isb.st_mtime);
 		return;
 	}
 
 	bytes_read = read_retry(STDIN_FILENO, header1, sizeof header1);
 	if (bytes_read == -1) {
 		maybe_warn("can't read stdin");
 		return;
 	} else if (bytes_read != sizeof(header1)) {
 		maybe_warnx("(stdin): unexpected end of file");
 		return;
 	}
 
 	method = file_gettype(header1);
 	switch (method) {
 	default:
 #ifndef SMALL
 		if (fflag == 0) {
 			maybe_warnx("unknown compression format");
 			return;
 		}
 		usize = cat_fd(header1, sizeof header1, &gsize, STDIN_FILENO);
 		break;
 #endif
 	case FT_GZIP:
 		usize = gz_uncompress(STDIN_FILENO, STDOUT_FILENO, 
 			      (char *)header1, sizeof header1, &gsize, "(stdin)");
 		break;
 #ifndef NO_BZIP2_SUPPORT
 	case FT_BZIP2:
 		usize = unbzip2(STDIN_FILENO, STDOUT_FILENO,
 				(char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 	case FT_Z:
 		if ((in = zdopen(STDIN_FILENO)) == NULL) {
 			maybe_warnx("zopen of stdin");
 			return;
 		}
 
 		usize = zuncompress(in, stdout, (char *)header1,
 		    sizeof header1, &gsize);
 		fclose(in);
 		break;
 #endif
 #ifndef NO_PACK_SUPPORT
 	case FT_PACK:
 		usize = unpack(STDIN_FILENO, STDOUT_FILENO,
 			       (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 #ifndef NO_XZ_SUPPORT
 	case FT_XZ:
 		usize = unxz(STDIN_FILENO, STDOUT_FILENO,
 			     (char *)header1, sizeof header1, &gsize);
 		break;
 #endif
 	}
 
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 	if (vflag && tflag)
 		print_test("(stdin)", usize != -1);
 #endif 
 
 }
 
 static void
 handle_stdout(void)
 {
 	off_t gsize, usize;
 	struct stat sb;
 	time_t systime;
 	uint32_t mtime;
 	int ret;
 
 #ifndef SMALL
 	if (fflag == 0 && isatty(STDOUT_FILENO)) {
 		maybe_warnx("standard output is a terminal -- ignoring");
 		return;
 	}
 #endif
 	/* If stdin is a file use its mtime, otherwise use current time */
 	ret = fstat(STDIN_FILENO, &sb);
 
 #ifndef SMALL
 	if (ret < 0) {
 		maybe_warn("Can't stat stdin");
 		return;
 	}
 #endif
 
 	if (S_ISREG(sb.st_mode))
 		mtime = (uint32_t)sb.st_mtime;
 	else {
 		systime = time(NULL);
 #ifndef SMALL
 		if (systime == -1) {
 			maybe_warn("time");
 			return;
 		} 
 #endif
 		mtime = (uint32_t)systime;
 	}
 	 		
 	usize = gz_compress(STDIN_FILENO, STDOUT_FILENO, &gsize, "", mtime);
 #ifndef SMALL
         if (vflag && !tflag && usize != -1 && gsize != -1)
 		print_verbage(NULL, NULL, usize, gsize);
 #endif 
 }
 
 /* do what is asked for, for the path name */
 static void
 handle_pathname(char *path)
 {
 	char *opath = path, *s = NULL;
 	ssize_t len;
 	int slen;
 	struct stat sb;
 
 	/* check for stdout/stdin */
 	if (path[0] == '-' && path[1] == '\0') {
 		if (dflag)
 			handle_stdin();
 		else
 			handle_stdout();
 		return;
 	}
 
 retry:
 	if (stat(path, &sb) != 0 || (fflag == 0 && cflag == 0 &&
 	    lstat(path, &sb) != 0)) {
 		/* lets try <path>.gz if we're decompressing */
 		if (dflag && s == NULL && errno == ENOENT) {
 			len = strlen(path);
 			slen = suffixes[0].ziplen;
 			s = malloc(len + slen + 1);
 			if (s == NULL)
 				maybe_err("malloc");
 			memcpy(s, path, len);
 			memcpy(s + len, suffixes[0].zipped, slen + 1);
 			path = s;
 			goto retry;
 		}
 		maybe_warn("can't stat: %s", opath);
 		goto out;
 	}
 
 	if (S_ISDIR(sb.st_mode)) {
 #ifndef SMALL
 		if (rflag)
 			handle_dir(path);
 		else
 #endif
 			maybe_warnx("%s is a directory", path);
 		goto out;
 	}
 
 	if (S_ISREG(sb.st_mode))
 		handle_file(path, &sb);
 	else
 		maybe_warnx("%s is not a regular file", path);
 
 out:
 	if (s)
 		free(s);
 }
 
 /* compress/decompress a file */
 static void
 handle_file(char *file, struct stat *sbp)
 {
 	off_t usize, gsize;
 	char	outfile[PATH_MAX];
 
 	infile = file;
 	if (dflag) {
 		usize = file_uncompress(file, outfile, sizeof(outfile));
 #ifndef SMALL
 		if (vflag && tflag)
 			print_test(file, usize != -1);
 #endif
 		if (usize == -1)
 			return;
 		gsize = sbp->st_size;
 	} else {
 		gsize = file_compress(file, outfile, sizeof(outfile));
 		if (gsize == -1)
 			return;
 		usize = sbp->st_size;
 	}
 
 
 #ifndef SMALL
 	if (vflag && !tflag)
 		print_verbage(file, (cflag) ? NULL : outfile, usize, gsize);
 #endif
 }
 
 #ifndef SMALL
 /* this is used with -r to recursively descend directories */
 static void
 handle_dir(char *dir)
 {
 	char *path_argv[2];
 	FTS *fts;
 	FTSENT *entry;
 
 	path_argv[0] = dir;
 	path_argv[1] = 0;
 	fts = fts_open(path_argv, FTS_PHYSICAL | FTS_NOCHDIR, NULL);
 	if (fts == NULL) {
 		warn("couldn't fts_open %s", dir);
 		return;
 	}
 
 	while ((entry = fts_read(fts))) {
 		switch(entry->fts_info) {
 		case FTS_D:
 		case FTS_DP:
 			continue;
 
 		case FTS_DNR:
 		case FTS_ERR:
 		case FTS_NS:
 			maybe_warn("%s", entry->fts_path);
 			continue;
 		case FTS_F:
 			handle_file(entry->fts_path, entry->fts_statp);
 		}
 	}
 	(void)fts_close(fts);
 }
 #endif
 
 /* print a ratio - size reduction as a fraction of uncompressed size */
 static void
 print_ratio(off_t in, off_t out, FILE *where)
 {
 	int percent10;	/* 10 * percent */
 	off_t diff;
 	char buff[8];
 	int len;
 
 	diff = in - out/2;
 	if (diff <= 0)
 		/*
 		 * Output is more than double size of input! print -99.9%
 		 * Quite possibly we've failed to get the original size.
 		 */
 		percent10 = -999;
 	else {
 		/*
 		 * We only need 12 bits of result from the final division,
 		 * so reduce the values until a 32bit division will suffice.
 		 */
 		while (in > 0x100000) {
 			diff >>= 1;
 			in >>= 1;
 		}
 		if (in != 0)
 			percent10 = ((u_int)diff * 2000) / (u_int)in - 1000;
 		else
 			percent10 = 0;
 	}
 
 	len = snprintf(buff, sizeof buff, "%2.2d.", percent10);
 	/* Move the '.' to before the last digit */
 	buff[len - 1] = buff[len - 2];
 	buff[len - 2] = '.';
 	fprintf(where, "%5s%%", buff);
 }
 
 #ifndef SMALL
 /* print compression statistics, and the new name (if there is one!) */
 static void
 print_verbage(const char *file, const char *nfile, off_t usize, off_t gsize)
 {
 	if (file)
 		fprintf(stderr, "%s:%s  ", file,
 		    strlen(file) < 7 ? "\t\t" : "\t");
 	print_ratio(usize, gsize, stderr);
 	if (nfile)
 		fprintf(stderr, " -- replaced with %s", nfile);
 	fprintf(stderr, "\n");
 	fflush(stderr);
 }
 
 /* print test results */
 static void
 print_test(const char *file, int ok)
 {
 
 	if (exit_value == 0 && ok == 0)
 		exit_value = 1;
 	fprintf(stderr, "%s:%s  %s\n", file,
 	    strlen(file) < 7 ? "\t\t" : "\t", ok ? "OK" : "NOT OK");
 	fflush(stderr);
 }
 #endif
 
 /* print a file's info ala --list */
 /* eg:
   compressed uncompressed  ratio uncompressed_name
       354841      1679360  78.8% /usr/pkgsrc/distfiles/libglade-2.0.1.tar
 */
 static void
 print_list(int fd, off_t out, const char *outfile, time_t ts)
 {
 	static int first = 1;
 #ifndef SMALL
 	static off_t in_tot, out_tot;
 	uint32_t crc = 0;
 #endif
 	off_t in = 0, rv;
 
 	if (first) {
 #ifndef SMALL
 		if (vflag)
 			printf("method  crc     date  time  ");
 #endif
 		if (qflag == 0)
 			printf("  compressed uncompressed  "
 			       "ratio uncompressed_name\n");
 	}
 	first = 0;
 
 	/* print totals? */
 #ifndef SMALL
 	if (fd == -1) {
 		in = in_tot;
 		out = out_tot;
 	} else
 #endif
 	{
 		/* read the last 4 bytes - this is the uncompressed size */
 		rv = lseek(fd, (off_t)(-8), SEEK_END);
 		if (rv != -1) {
 			unsigned char buf[8];
 			uint32_t usize;
 
 			rv = read(fd, (char *)buf, sizeof(buf));
 			if (rv == -1)
 				maybe_warn("read of uncompressed size");
 			else if (rv != sizeof(buf))
 				maybe_warnx("read of uncompressed size");
 
 			else {
 				usize = buf[4] | buf[5] << 8 |
 					buf[6] << 16 | buf[7] << 24;
 				in = (off_t)usize;
 #ifndef SMALL
 				crc = buf[0] | buf[1] << 8 |
 				      buf[2] << 16 | buf[3] << 24;
 #endif
 			}
 		}
 	}
 
 #ifndef SMALL
 	if (vflag && fd == -1)
 		printf("                            ");
 	else if (vflag) {
 		char *date = ctime(&ts);
 
 		/* skip the day, 1/100th second, and year */
 		date += 4;
 		date[12] = 0;
 		printf("%5s %08x %11s ", "defla"/*XXX*/, crc, date);
 	}
 	in_tot += in;
 	out_tot += out;
 #else
 	(void)&ts;	/* XXX */
 #endif
 	printf("%12llu %12llu ", (unsigned long long)out, (unsigned long long)in);
 	print_ratio(in, out, stdout);
 	printf(" %s\n", outfile);
 }
 
 /* display the usage of NetBSD gzip */
 static void
 usage(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	fprintf(stderr,
 #ifdef SMALL
     "usage: %s [-" OPT_LIST "] [<file> [<file> ...]]\n",
 #else
     "usage: %s [-123456789acdfhklLNnqrtVv] [-S .suffix] [<file> [<file> ...]]\n"
     " -1 --fast            fastest (worst) compression\n"
     " -2 .. -8             set compression level\n"
     " -9 --best            best (slowest) compression\n"
     " -c --stdout          write to stdout, keep original files\n"
     "    --to-stdout\n"
     " -d --decompress      uncompress files\n"
     "    --uncompress\n"
     " -f --force           force overwriting & compress links\n"
     " -h --help            display this help\n"
     " -k --keep            don't delete input files during operation\n"
     " -l --list            list compressed file contents\n"
     " -N --name            save or restore original file name and time stamp\n"
     " -n --no-name         don't save original file name or time stamp\n"
     " -q --quiet           output no warnings\n"
     " -r --recursive       recursively compress files in directories\n"
     " -S .suf              use suffix .suf instead of .gz\n"
     "    --suffix .suf\n"
     " -t --test            test compressed file\n"
     " -V --version         display program version\n"
     " -v --verbose         print extra statistics\n",
 #endif
 	    getprogname());
 	exit(0);
 }
 
 #ifndef SMALL
 /* display the license information of FreeBSD gzip */
 static void
 display_license(void)
 {
 
 	fprintf(stderr, "%s (based on NetBSD gzip 20150113)\n", gzip_version);
 	fprintf(stderr, "%s\n", gzip_copyright);
 	exit(0);
 }
 #endif
 
 /* display the version of NetBSD gzip */
 static void
 display_version(void)
 {
 
 	fprintf(stderr, "%s\n", gzip_version);
 	exit(0);
 }
 
 #ifndef NO_BZIP2_SUPPORT
 #include "unbzip2.c"
 #endif
 #ifndef NO_COMPRESS_SUPPORT
 #include "zuncompress.c"
 #endif
 #ifndef NO_PACK_SUPPORT
 #include "unpack.c"
 #endif
 #ifndef NO_XZ_SUPPORT
 #include "unxz.c"
 #endif
 
 static ssize_t
 read_retry(int fd, void *buf, size_t sz)
 {
 	char *cp = buf;
 	size_t left = MIN(sz, (size_t) SSIZE_MAX);
 
 	while (left > 0) {
 		ssize_t ret;
 
 		ret = read(fd, cp, left);
 		if (ret == -1) {
 			return ret;
 		} else if (ret == 0) {
 			break; /* EOF */
 		}
 		cp += ret;
 		left -= ret;
 	}
 
 	return sz - left;
 }
Index: head/usr.sbin/fmtree/compare.c
===================================================================
--- head/usr.sbin/fmtree/compare.c	(revision 311521)
+++ head/usr.sbin/fmtree/compare.c	(revision 311522)
@@ -1,388 +1,389 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)compare.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <sys/time.h>
 
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fts.h>
 #ifdef MD5
 #include <md5.h>
 #endif
 #ifdef RMD160
 #include <ripemd.h>
 #endif
 #ifdef SHA1
 #include <sha.h>
 #endif
 #ifdef SHA256
 #include <sha256.h>
 #endif
 #include <stdint.h>
 #include <stdio.h>
 #include <time.h>
 #include <unistd.h>
 #include <vis.h>
 
 #include "mtree.h"
 #include "extern.h"
 
 #define	INDENTNAMELEN	8
 #define	LABEL \
 	if (!label++) { \
 		len = printf("%s changed\n", RP(p)); \
 		tab = "\t"; \
 	}
 
 int
 compare(char *name __unused, NODE *s, FTSENT *p)
 {
 	struct timeval tv[2];
 	uint32_t val;
 	int fd, label;
 	off_t len;
 	char *cp;
 	const char *tab = "";
 	char *fflags;
 
 	label = 0;
 	switch(s->type) {
 	case F_BLOCK:
 		if (!S_ISBLK(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_CHAR:
 		if (!S_ISCHR(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_DIR:
 		if (!S_ISDIR(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_FIFO:
 		if (!S_ISFIFO(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_FILE:
 		if (!S_ISREG(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_LINK:
 		if (!S_ISLNK(p->fts_statp->st_mode))
 			goto typeerr;
 		break;
 	case F_SOCK:
 		if (!S_ISSOCK(p->fts_statp->st_mode)) {
 typeerr:		LABEL;
 			(void)printf("\ttype expected %s found %s\n",
 			    ftype(s->type), inotype(p->fts_statp->st_mode));
 			return (label);
 		}
 		break;
 	}
 	/* Set the uid/gid first, then set the mode. */
 	if (s->flags & (F_UID | F_UNAME) && s->st_uid != p->fts_statp->st_uid) {
 		LABEL;
 		(void)printf("%suser expected %lu found %lu",
 		    tab, (u_long)s->st_uid, (u_long)p->fts_statp->st_uid);
 		if (uflag)
 			if (chown(p->fts_accpath, s->st_uid, -1))
 				(void)printf(" not modified: %s\n",
 				    strerror(errno));
 			else
 				(void)printf(" modified\n");
 		else
 			(void)printf("\n");
 		tab = "\t";
 	}
 	if (s->flags & (F_GID | F_GNAME) && s->st_gid != p->fts_statp->st_gid) {
 		LABEL;
 		(void)printf("%sgid expected %lu found %lu",
 		    tab, (u_long)s->st_gid, (u_long)p->fts_statp->st_gid);
 		if (uflag)
 			if (chown(p->fts_accpath, -1, s->st_gid))
 				(void)printf(" not modified: %s\n",
 				    strerror(errno));
 			else
 				(void)printf(" modified\n");
 		else
 			(void)printf("\n");
 		tab = "\t";
 	}
 	if (s->flags & F_MODE &&
 	    !S_ISLNK(p->fts_statp->st_mode) &&
 	    s->st_mode != (p->fts_statp->st_mode & MBITS)) {
 		LABEL;
 		(void)printf("%spermissions expected %#o found %#o",
 		    tab, s->st_mode, p->fts_statp->st_mode & MBITS);
 		if (uflag)
 			if (chmod(p->fts_accpath, s->st_mode))
 				(void)printf(" not modified: %s\n",
 				    strerror(errno));
 			else
 				(void)printf(" modified\n");
 		else
 			(void)printf("\n");
 		tab = "\t";
 	}
 	if (s->flags & F_NLINK && s->type != F_DIR &&
 	    s->st_nlink != p->fts_statp->st_nlink) {
 		LABEL;
-		(void)printf("%slink_count expected %u found %u\n",
-		    tab, s->st_nlink, p->fts_statp->st_nlink);
+		(void)printf("%slink_count expected %ju found %ju\n",
+		    tab, (uintmax_t)s->st_nlink,
+		    (uintmax_t)p->fts_statp->st_nlink);
 		tab = "\t";
 	}
 	if (s->flags & F_SIZE && s->st_size != p->fts_statp->st_size &&
 		!S_ISDIR(p->fts_statp->st_mode)) {
 		LABEL;
 		(void)printf("%ssize expected %jd found %jd\n", tab,
 		    (intmax_t)s->st_size, (intmax_t)p->fts_statp->st_size);
 		tab = "\t";
 	}
 	/*
 	 * XXX
 	 * Catches nano-second differences, but doesn't display them.
 	 */
 	if ((s->flags & F_TIME) &&
 	     ((s->st_mtimespec.tv_sec != p->fts_statp->st_mtim.tv_sec) ||
 	     (s->st_mtimespec.tv_nsec != p->fts_statp->st_mtim.tv_nsec))) {
 		LABEL;
 		(void)printf("%smodification time expected %.24s ",
 		    tab, ctime(&s->st_mtimespec.tv_sec));
 		(void)printf("found %.24s",
 		    ctime(&p->fts_statp->st_mtim.tv_sec));
 		if (uflag) {
 			tv[0].tv_sec = s->st_mtimespec.tv_sec;
 			tv[0].tv_usec = s->st_mtimespec.tv_nsec / 1000;
 			tv[1] = tv[0];
 			if (utimes(p->fts_accpath, tv))
 				(void)printf(" not modified: %s\n",
 				    strerror(errno));
 			else
 				(void)printf(" modified\n");
 		} else
 			(void)printf("\n");
 		tab = "\t";
 	}
 	if (s->flags & F_CKSUM) {
 		if ((fd = open(p->fts_accpath, O_RDONLY, 0)) < 0) {
 			LABEL;
 			(void)printf("%scksum: %s: %s\n",
 			    tab, p->fts_accpath, strerror(errno));
 			tab = "\t";
 		} else if (crc(fd, &val, &len)) {
 			(void)close(fd);
 			LABEL;
 			(void)printf("%scksum: %s: %s\n",
 			    tab, p->fts_accpath, strerror(errno));
 			tab = "\t";
 		} else {
 			(void)close(fd);
 			if (s->cksum != val) {
 				LABEL;
 				(void)printf("%scksum expected %lu found %lu\n",
 				    tab, s->cksum, (unsigned long)val);
 				tab = "\t";
 			}
 		}
 	}
 	if ((s->flags & F_FLAGS) && s->st_flags != p->fts_statp->st_flags) {
 		LABEL;
 		fflags = flags_to_string(s->st_flags);
 		(void)printf("%sflags expected \"%s\"", tab, fflags);
 		free(fflags);
 
 		fflags = flags_to_string(p->fts_statp->st_flags);
 		(void)printf(" found \"%s\"", fflags);
 		free(fflags);
 
 		if (uflag)
 			if (chflags(p->fts_accpath, s->st_flags))
 				(void)printf(" not modified: %s\n",
 				    strerror(errno));
 			else
 				(void)printf(" modified\n");
 		else
 			(void)printf("\n");
 		tab = "\t";
 	}
 #ifdef MD5
 	if (s->flags & F_MD5) {
 		char *new_digest, buf[33];
 
 		new_digest = MD5File(p->fts_accpath, buf);
 		if (!new_digest) {
 			LABEL;
 			printf("%sMD5: %s: %s\n", tab, p->fts_accpath,
 			       strerror(errno));
 			tab = "\t";
 		} else if (strcmp(new_digest, s->md5digest)) {
 			LABEL;
 			printf("%sMD5 expected %s found %s\n", tab, s->md5digest,
 			       new_digest);
 			tab = "\t";
 		}
 	}
 #endif /* MD5 */
 #ifdef SHA1
 	if (s->flags & F_SHA1) {
 		char *new_digest, buf[41];
 
 		new_digest = SHA1_File(p->fts_accpath, buf);
 		if (!new_digest) {
 			LABEL;
 			printf("%sSHA-1: %s: %s\n", tab, p->fts_accpath,
 			       strerror(errno));
 			tab = "\t";
 		} else if (strcmp(new_digest, s->sha1digest)) {
 			LABEL;
 			printf("%sSHA-1 expected %s found %s\n",
 			       tab, s->sha1digest, new_digest);
 			tab = "\t";
 		}
 	}
 #endif /* SHA1 */
 #ifdef RMD160
 	if (s->flags & F_RMD160) {
 		char *new_digest, buf[41];
 
 		new_digest = RIPEMD160_File(p->fts_accpath, buf);
 		if (!new_digest) {
 			LABEL;
 			printf("%sRIPEMD160: %s: %s\n", tab,
 			       p->fts_accpath, strerror(errno));
 			tab = "\t";
 		} else if (strcmp(new_digest, s->rmd160digest)) {
 			LABEL;
 			printf("%sRIPEMD160 expected %s found %s\n",
 			       tab, s->rmd160digest, new_digest);
 			tab = "\t";
 		}
 	}
 #endif /* RMD160 */
 #ifdef SHA256
 	if (s->flags & F_SHA256) {
 		char *new_digest, buf[65];
 
 		new_digest = SHA256_File(p->fts_accpath, buf);
 		if (!new_digest) {
 			LABEL;
 			printf("%sSHA-256: %s: %s\n", tab, p->fts_accpath,
 			       strerror(errno));
 			tab = "\t";
 		} else if (strcmp(new_digest, s->sha256digest)) {
 			LABEL;
 			printf("%sSHA-256 expected %s found %s\n",
 			       tab, s->sha256digest, new_digest);
 			tab = "\t";
 		}
 	}
 #endif /* SHA256 */
 
 	if (s->flags & F_SLINK &&
 	    strcmp(cp = rlink(p->fts_accpath), s->slink)) {
 		LABEL;
 		(void)printf("%slink_ref expected %s found %s\n",
 		      tab, s->slink, cp);
 	}
 	return (label);
 }
 
 const char *
 inotype(u_int type)
 {
 	switch(type & S_IFMT) {
 	case S_IFBLK:
 		return ("block");
 	case S_IFCHR:
 		return ("char");
 	case S_IFDIR:
 		return ("dir");
 	case S_IFIFO:
 		return ("fifo");
 	case S_IFREG:
 		return ("file");
 	case S_IFLNK:
 		return ("link");
 	case S_IFSOCK:
 		return ("socket");
 	default:
 		return ("unknown");
 	}
 	/* NOTREACHED */
 }
 
 const char *
 ftype(u_int type)
 {
 	switch(type) {
 	case F_BLOCK:
 		return ("block");
 	case F_CHAR:
 		return ("char");
 	case F_DIR:
 		return ("dir");
 	case F_FIFO:
 		return ("fifo");
 	case F_FILE:
 		return ("file");
 	case F_LINK:
 		return ("link");
 	case F_SOCK:
 		return ("socket");
 	default:
 		return ("unknown");
 	}
 	/* NOTREACHED */
 }
 
 char *
 rlink(char *name)
 {
 	static char lbuf[MAXPATHLEN * 4];
 	int len;
 	char tbuf[MAXPATHLEN];
 
 	if ((len = readlink(name, tbuf, sizeof(tbuf) - 1)) == -1)
 		err(1, "line %d: %s", lineno, name);
 	tbuf[len] = '\0';
 	strvis(lbuf, tbuf, VIS_WHITE | VIS_OCTAL);
 	return (lbuf);
 }
Index: head/usr.sbin/fmtree/create.c
===================================================================
--- head/usr.sbin/fmtree/create.c	(revision 311521)
+++ head/usr.sbin/fmtree/create.c	(revision 311522)
@@ -1,428 +1,429 @@
 /*-
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #if 0
 #ifndef lint
 static char sccsid[] = "@(#)create.c	8.1 (Berkeley) 6/6/93";
 #endif /* not lint */
 #endif
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <dirent.h>
 #include <err.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <fts.h>
 #include <grp.h>
 #ifdef MD5
 #include <md5.h>
 #endif
 #ifdef SHA1
 #include <sha.h>
 #endif
 #ifdef RMD160
 #include <ripemd.h>
 #endif
 #ifdef SHA256
 #include <sha256.h>
 #endif
 #include <pwd.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <time.h>
 #include <unistd.h>
 #include <vis.h>
 #include "mtree.h"
 #include "extern.h"
 
 #define	INDENTNAMELEN	15
 #define	MAXLINELEN	80
 
 static gid_t gid;
 static uid_t uid;
 static mode_t mode;
 static u_long flags = 0xffffffff;
 
 static int	dsort(const FTSENT * const *, const FTSENT * const *);
 static void	output(int, int *, const char *, ...) __printflike(3, 4);
 static int	statd(FTS *, FTSENT *, uid_t *, gid_t *, mode_t *, u_long *);
 static void	statf(int, FTSENT *);
 
 void
 cwalk(void)
 {
 	FTS *t;
 	FTSENT *p;
 	time_t cl;
 	char *argv[2], host[MAXHOSTNAMELEN];
 	char dot[] = ".";
 	int indent = 0;
 
 	if (!nflag) {
 		(void)time(&cl);
 		(void)gethostname(host, sizeof(host));
 		(void)printf(
 		    "#\t   user: %s\n#\tmachine: %s\n",
 		    getlogin(), host);
 		(void)printf(
 		    "#\t   tree: %s\n#\t   date: %s",
 		    fullpath, ctime(&cl));
 	}
 
 	argv[0] = dot;
 	argv[1] = NULL;
 	if ((t = fts_open(argv, ftsoptions, dsort)) == NULL)
 		err(1, "fts_open()");
 	while ((p = fts_read(t))) {
 		if (iflag)
 			indent = p->fts_level * 4;
 		if (check_excludes(p->fts_name, p->fts_path)) {
 			fts_set(t, p, FTS_SKIP);
 			continue;
 		}
 		switch(p->fts_info) {
 		case FTS_D:
 			if (!dflag)
 				(void)printf("\n");
 			if (!nflag)
 				(void)printf("# %s\n", p->fts_path);
 			statd(t, p, &uid, &gid, &mode, &flags);
 			statf(indent, p);
 			break;
 		case FTS_DP:
 			if (!nflag && (p->fts_level > 0))
 				(void)printf("%*s# %s\n", indent, "", p->fts_path);
 			(void)printf("%*s..\n", indent, "");
 			if (!dflag)
 				(void)printf("\n");
 			break;
 		case FTS_DNR:
 		case FTS_ERR:
 		case FTS_NS:
 			warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
 			break;
 		default:
 			if (!dflag)
 				statf(indent, p);
 			break;
 
 		}
 	}
 	(void)fts_close(t);
 	if (sflag && keys & F_CKSUM)
 		warnx("%s checksum: %lu", fullpath, (unsigned long)crc_total);
 }
 
 static void
 statf(int indent, FTSENT *p)
 {
 	struct group *gr;
 	struct passwd *pw;
 	uint32_t val;
 	off_t len;
 	int fd, offset;
 	char *fflags;
 	char *escaped_name;
 
 	escaped_name = calloc(1, p->fts_namelen * 4  +  1);
 	if (escaped_name == NULL)
 		errx(1, "statf(): calloc() failed");
 	strvis(escaped_name, p->fts_name, VIS_WHITE | VIS_OCTAL | VIS_GLOB);
 
 	if (iflag || S_ISDIR(p->fts_statp->st_mode))
 		offset = printf("%*s%s", indent, "", escaped_name);
 	else
 		offset = printf("%*s    %s", indent, "", escaped_name);
 
 	free(escaped_name);
 
 	if (offset > (INDENTNAMELEN + indent))
 		offset = MAXLINELEN;
 	else
 		offset += printf("%*s", (INDENTNAMELEN + indent) - offset, "");
 
 	if (!S_ISREG(p->fts_statp->st_mode) && !dflag)
 		output(indent, &offset, "type=%s", inotype(p->fts_statp->st_mode));
 	if (p->fts_statp->st_uid != uid) {
 		if (keys & F_UNAME) {
 			pw = getpwuid(p->fts_statp->st_uid);
 			if (pw != NULL)
 				output(indent, &offset, "uname=%s", pw->pw_name);
 			else if (wflag)
 				warnx("Could not get uname for uid=%u",
 				    p->fts_statp->st_uid);
 			else
 				errx(1,
 				    "Could not get uname for uid=%u",
 				    p->fts_statp->st_uid);
 		}
 		if (keys & F_UID)
 			output(indent, &offset, "uid=%u", p->fts_statp->st_uid);
 	}
 	if (p->fts_statp->st_gid != gid) {
 		if (keys & F_GNAME) {
 			gr = getgrgid(p->fts_statp->st_gid);
 			if (gr != NULL)
 				output(indent, &offset, "gname=%s", gr->gr_name);
 			else if (wflag)
 				warnx("Could not get gname for gid=%u",
 				    p->fts_statp->st_gid);
 			else
 				errx(1,
 				    "Could not get gname for gid=%u",
 				    p->fts_statp->st_gid);
 		}
 		if (keys & F_GID)
 			output(indent, &offset, "gid=%u", p->fts_statp->st_gid);
 	}
 	if (keys & F_MODE && (p->fts_statp->st_mode & MBITS) != mode)
 		output(indent, &offset, "mode=%#o", p->fts_statp->st_mode & MBITS);
 	if (keys & F_NLINK && p->fts_statp->st_nlink != 1)
-		output(indent, &offset, "nlink=%u", p->fts_statp->st_nlink);
+		output(indent, &offset, "nlink=%ju",
+		    (uintmax_t)p->fts_statp->st_nlink);
 	if (keys & F_SIZE && S_ISREG(p->fts_statp->st_mode))
 		output(indent, &offset, "size=%jd",
 		    (intmax_t)p->fts_statp->st_size);
 	if (keys & F_TIME)
 		output(indent, &offset, "time=%ld.%09ld",
 		    (long)p->fts_statp->st_mtim.tv_sec,
 		    p->fts_statp->st_mtim.tv_nsec);
 	if (keys & F_CKSUM && S_ISREG(p->fts_statp->st_mode)) {
 		if ((fd = open(p->fts_accpath, O_RDONLY, 0)) < 0 ||
 		    crc(fd, &val, &len))
 			err(1, "%s", p->fts_accpath);
 		(void)close(fd);
 		output(indent, &offset, "cksum=%lu", (unsigned long)val);
 	}
 #ifdef MD5
 	if (keys & F_MD5 && S_ISREG(p->fts_statp->st_mode)) {
 		char *digest, buf[33];
 
 		digest = MD5File(p->fts_accpath, buf);
 		if (!digest)
 			err(1, "%s", p->fts_accpath);
 		output(indent, &offset, "md5digest=%s", digest);
 	}
 #endif /* MD5 */
 #ifdef SHA1
 	if (keys & F_SHA1 && S_ISREG(p->fts_statp->st_mode)) {
 		char *digest, buf[41];
 
 		digest = SHA1_File(p->fts_accpath, buf);
 		if (!digest)
 			err(1, "%s", p->fts_accpath);
 		output(indent, &offset, "sha1digest=%s", digest);
 	}
 #endif /* SHA1 */
 #ifdef RMD160
 	if (keys & F_RMD160 && S_ISREG(p->fts_statp->st_mode)) {
 		char *digest, buf[41];
 
 		digest = RIPEMD160_File(p->fts_accpath, buf);
 		if (!digest)
 			err(1, "%s", p->fts_accpath);
 		output(indent, &offset, "ripemd160digest=%s", digest);
 	}
 #endif /* RMD160 */
 #ifdef SHA256
 	if (keys & F_SHA256 && S_ISREG(p->fts_statp->st_mode)) {
 		char *digest, buf[65];
 
 		digest = SHA256_File(p->fts_accpath, buf);
 		if (!digest)
 			err(1, "%s", p->fts_accpath);
 		output(indent, &offset, "sha256digest=%s", digest);
 	}
 #endif /* SHA256 */
 	if (keys & F_SLINK &&
 	    (p->fts_info == FTS_SL || p->fts_info == FTS_SLNONE))
 		output(indent, &offset, "link=%s", rlink(p->fts_accpath));
 	if (keys & F_FLAGS && p->fts_statp->st_flags != flags) {
 		fflags = flags_to_string(p->fts_statp->st_flags);
 		output(indent, &offset, "flags=%s", fflags);
 		free(fflags);
 	}
 	(void)putchar('\n');
 }
 
 #define	MAXGID	5000
 #define	MAXUID	5000
 #define	MAXMODE	MBITS + 1
 #define	MAXFLAGS 256
 #define	MAXS 16
 
 static int
 statd(FTS *t, FTSENT *parent, uid_t *puid, gid_t *pgid, mode_t *pmode, u_long *pflags)
 {
 	FTSENT *p;
 	gid_t sgid;
 	uid_t suid;
 	mode_t smode;
 	u_long sflags;
 	struct group *gr;
 	struct passwd *pw;
 	gid_t savegid = *pgid;
 	uid_t saveuid = *puid;
 	mode_t savemode = *pmode;
 	u_long saveflags = *pflags;
 	u_short maxgid, maxuid, maxmode, maxflags;
 	u_short g[MAXGID], u[MAXUID], m[MAXMODE], f[MAXFLAGS];
 	char *fflags;
 	static int first = 1;
 
 	if ((p = fts_children(t, 0)) == NULL) {
 		if (errno)
 			err(1, "%s", RP(parent));
 		return (1);
 	}
 
 	bzero(g, sizeof(g));
 	bzero(u, sizeof(u));
 	bzero(m, sizeof(m));
 	bzero(f, sizeof(f));
 
 	maxuid = maxgid = maxmode = maxflags = 0;
 	for (; p; p = p->fts_link) {
 		if (!dflag || (dflag && S_ISDIR(p->fts_statp->st_mode))) {
 			smode = p->fts_statp->st_mode & MBITS;
 			if (smode < MAXMODE && ++m[smode] > maxmode) {
 				savemode = smode;
 				maxmode = m[smode];
 			}
 			sgid = p->fts_statp->st_gid;
 			if (sgid < MAXGID && ++g[sgid] > maxgid) {
 				savegid = sgid;
 				maxgid = g[sgid];
 			}
 			suid = p->fts_statp->st_uid;
 			if (suid < MAXUID && ++u[suid] > maxuid) {
 				saveuid = suid;
 				maxuid = u[suid];
 			}
 
 			/*
 			 * XXX
 			 * note that the below will break when file flags
 			 * are extended beyond the first 4 bytes of each
 			 * half word of the flags
 			 */
 #define FLAGS2IDX(f) ((f & 0xf) | ((f >> 12) & 0xf0))
 			sflags = p->fts_statp->st_flags;
 			if (FLAGS2IDX(sflags) < MAXFLAGS &&
 			    ++f[FLAGS2IDX(sflags)] > maxflags) {
 				saveflags = sflags;
 				maxflags = f[FLAGS2IDX(sflags)];
 			}
 		}
 	}
 	/*
 	 * If the /set record is the same as the last one we do not need to output
 	 * a new one.  So first we check to see if anything changed.  Note that we
 	 * always output a /set record for the first directory.
 	 */
 	if ((((keys & F_UNAME) | (keys & F_UID)) && (*puid != saveuid)) ||
 	    (((keys & F_GNAME) | (keys & F_GID)) && (*pgid != savegid)) ||
 	    ((keys & F_MODE) && (*pmode != savemode)) ||
 	    ((keys & F_FLAGS) && (*pflags != saveflags)) ||
 	    (first)) {
 		first = 0;
 		if (dflag)
 			(void)printf("/set type=dir");
 		else
 			(void)printf("/set type=file");
 		if (keys & F_UNAME) {
 			pw = getpwuid(saveuid);
 			if (pw != NULL)
 				(void)printf(" uname=%s", pw->pw_name);
 			else if (wflag)
 				warnx( "Could not get uname for uid=%u", saveuid);
 			else
 				errx(1, "Could not get uname for uid=%u", saveuid);
 		}
 		if (keys & F_UID)
 			(void)printf(" uid=%lu", (u_long)saveuid);
 		if (keys & F_GNAME) {
 			gr = getgrgid(savegid);
 			if (gr != NULL)
 				(void)printf(" gname=%s", gr->gr_name);
 			else if (wflag)
 				warnx("Could not get gname for gid=%u", savegid);
 			else
 				errx(1, "Could not get gname for gid=%u", savegid);
 		}
 		if (keys & F_GID)
 			(void)printf(" gid=%lu", (u_long)savegid);
 		if (keys & F_MODE)
 			(void)printf(" mode=%#o", savemode);
 		if (keys & F_NLINK)
 			(void)printf(" nlink=1");
 		if (keys & F_FLAGS) {
 			fflags = flags_to_string(saveflags);
 			(void)printf(" flags=%s", fflags);
 			free(fflags);
 		}
 		(void)printf("\n");
 		*puid = saveuid;
 		*pgid = savegid;
 		*pmode = savemode;
 		*pflags = saveflags;
 	}
 	return (0);
 }
 
 static int
 dsort(const FTSENT * const *a, const FTSENT * const *b)
 {
 	if (S_ISDIR((*a)->fts_statp->st_mode)) {
 		if (!S_ISDIR((*b)->fts_statp->st_mode))
 			return (1);
 	} else if (S_ISDIR((*b)->fts_statp->st_mode))
 		return (-1);
 	return (strcmp((*a)->fts_name, (*b)->fts_name));
 }
 
 #include <stdarg.h>
 
 void
 output(int indent, int *offset, const char *fmt, ...)
 {
 	va_list ap;
 	char buf[1024];
 	va_start(ap, fmt);
 	(void)vsnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
 
 	if (*offset + strlen(buf) > MAXLINELEN - 3) {
 		(void)printf(" \\\n%*s", INDENTNAMELEN + indent, "");
 		*offset = INDENTNAMELEN + indent;
 	}
 	*offset += printf(" %s", buf) + 1;
 }
Index: head/usr.sbin/fmtree/specspec.c
===================================================================
--- head/usr.sbin/fmtree/specspec.c	(revision 311521)
+++ head/usr.sbin/fmtree/specspec.c	(revision 311522)
@@ -1,256 +1,256 @@
 /*-
  * Copyright (c) 2003 Poul-Henning Kamp
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <err.h>
 #include <grp.h>
 #include <pwd.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <unistd.h>
 #include "mtree.h"
 #include "extern.h"
 
 #define FF(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && ((a)->d) != ((b)->d))
 #define FS(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && strcmp((a)->d,(b)->d))
 #define FM(a, b, c, d) \
 	(((a)->flags & (c)) && ((b)->flags & (c)) && memcmp(&(a)->d,&(b)->d, sizeof (a)->d))
 
 static void
 shownode(NODE *n, int f, char const *path)
 {
 	struct group *gr;
 	struct passwd *pw;
 
 	printf("%s%s %s", path, n->name, ftype(n->type));
 	if (f & F_CKSUM)
 		printf(" cksum=%lu", n->cksum);
 	if (f & F_GID)
 		printf(" gid=%d", n->st_gid);
 	if (f & F_GNAME) {
 		gr = getgrgid(n->st_gid);
 		if (gr == NULL)
 			printf(" gid=%d", n->st_gid);
 		else
 			printf(" gname=%s", gr->gr_name);
 	}
 	if (f & F_MODE)
 		printf(" mode=%o", n->st_mode);
 	if (f & F_NLINK)
-		printf(" nlink=%d", n->st_nlink);
+		printf(" nlink=%ju", (uintmax_t)n->st_nlink);
 	if (f & F_SIZE)
 		printf(" size=%jd", (intmax_t)n->st_size);
 	if (f & F_UID)
 		printf(" uid=%d", n->st_uid);
 	if (f & F_UNAME) {
 		pw = getpwuid(n->st_uid);
 		if (pw == NULL)
 			printf(" uid=%d", n->st_uid);
 		else
 			printf(" uname=%s", pw->pw_name);
 	}
 	if (f & F_MD5)
 		printf(" md5digest=%s", n->md5digest);
 	if (f & F_SHA1)
 		printf(" sha1digest=%s", n->sha1digest);
 	if (f & F_RMD160)
 		printf(" rmd160digest=%s", n->rmd160digest);
 	if (f & F_SHA256)
 		printf(" sha256digest=%s", n->sha256digest);
 	if (f & F_FLAGS)
 		printf(" flags=%s", flags_to_string(n->st_flags));
 	printf("\n");
 }
 
 static int
 mismatch(NODE *n1, NODE *n2, int differ, char const *path)
 {
 
 	if (n2 == NULL) {
 		shownode(n1, differ, path);
 		return (1);
 	}
 	if (n1 == NULL) {
 		printf("\t");
 		shownode(n2, differ, path);
 		return (1);
 	}
 	if (!(differ & keys))
 		return(0);
 	printf("\t\t");
 	shownode(n1, differ, path);
 	printf("\t\t");
 	shownode(n2, differ, path);
 	return (1);
 }
 
 static int
 compare_nodes(NODE *n1, NODE *n2, char const *path)
 {
 	int differs;
 	
 	if (n1 != NULL && n1->type == F_LINK)
 		n1->flags &= ~F_MODE;
 	if (n2 != NULL && n2->type == F_LINK)
 		n2->flags &= ~F_MODE;
 	differs = 0;
 	if (n1 == NULL && n2 != NULL) {
 		differs = n2->flags;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (n1 != NULL && n2 == NULL) {
 		differs = n1->flags;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (n1->type != n2->type) {
 		differs = 0;
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	if (FF(n1, n2, F_CKSUM, cksum))
 		differs |= F_CKSUM;
 	if (FF(n1, n2, F_GID, st_gid))
 		differs |= F_GID;
 	if (FF(n1, n2, F_GNAME, st_gid))
 		differs |= F_GNAME;
 	if (FF(n1, n2, F_MODE, st_mode))
 		differs |= F_MODE;
 	if (FF(n1, n2, F_NLINK, st_nlink))
 		differs |= F_NLINK;
 	if (FF(n1, n2, F_SIZE, st_size))
 		differs |= F_SIZE;
 	if (FS(n1, n2, F_SLINK, slink))
 		differs |= F_SLINK;
 	if (FM(n1, n2, F_TIME, st_mtimespec))
 		differs |= F_TIME;
 	if (FF(n1, n2, F_UID, st_uid))
 		differs |= F_UID;
 	if (FF(n1, n2, F_UNAME, st_uid))
 		differs |= F_UNAME;
 	if (FS(n1, n2, F_MD5, md5digest))
 		differs |= F_MD5;
 	if (FS(n1, n2, F_SHA1, sha1digest))
 		differs |= F_SHA1;
 	if (FS(n1, n2, F_RMD160, rmd160digest))
 		differs |= F_RMD160;
 	if (FS(n1, n2, F_SHA256, sha256digest))
 		differs |= F_SHA256;
 	if (FF(n1, n2, F_FLAGS, st_flags))
 		differs |= F_FLAGS;
 	if (differs) {
 		mismatch(n1, n2, differs, path);
 		return (1);
 	}
 	return (0);	
 }
 static int
 walk_in_the_forest(NODE *t1, NODE *t2, char const *path)
 {
 	int r, i;
 	NODE *c1, *c2, *n1, *n2;
 	char *np;
 
 	r = 0;
 
 	if (t1 != NULL)
 		c1 = t1->child;
 	else
 		c1 = NULL;
 	if (t2 != NULL)
 		c2 = t2->child;
 	else
 		c2 = NULL;
 	while (c1 != NULL || c2 != NULL) {
 		n1 = n2 = NULL;
 		if (c1 != NULL)
 			n1 = c1->next;
 		if (c2 != NULL)
 			n2 = c2->next;
 		if (c1 != NULL && c2 != NULL) {
 			if (c1->type != F_DIR && c2->type == F_DIR) {
 				n2 = c2;
 				c2 = NULL;
 			} else if (c1->type == F_DIR && c2->type != F_DIR) {
 				n1 = c1;
 				c1 = NULL;
 			} else {
 				i = strcmp(c1->name, c2->name);
 				if (i > 0) {
 					n1 = c1;
 					c1 = NULL;
 				} else if (i < 0) {
 					n2 = c2;
 					c2 = NULL;
 				}
 			}
 		}
 		if (c1 == NULL && c2->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c2->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else if (c2 == NULL && c1->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c1->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else if (c1 == NULL || c2 == NULL) {
 			i = compare_nodes(c1, c2, path);
 		} else if (c1->type == F_DIR && c2->type == F_DIR) {
 			asprintf(&np, "%s%s/", path, c1->name);
 			i = walk_in_the_forest(c1, c2, np);
 			free(np);
 			i += compare_nodes(c1, c2, path);
 		} else {
 			i = compare_nodes(c1, c2, path);
 		}
 		r += i;
 		c1 = n1;
 		c2 = n2;
 	}
 	return (r);	
 }
 
 int
 mtree_specspec(FILE *fi, FILE *fj)
 {
 	int rval;
 	NODE *root1, *root2;
 
 	root1 = mtree_readspec(fi);
 	root2 = mtree_readspec(fj);
 	rval = walk_in_the_forest(root1, root2, "");
 	rval += compare_nodes(root1, root2, "");
 	if (rval > 0)
 		return (MISMATCHEXIT);
 	return (0);
 }